Esempio n. 1
0
    def get_text(self):
        from calibre.ebooks.oeb.base import XHTML
        from calibre.ebooks.oeb.stylizer import Stylizer
        text = ['<body>']

        # Create main section if there are no others to create
        if self.opts.sectionize == 'nothing':
            text.append('<section>')
            self.section_level += 1

        for item in self.oeb_book.spine:
            self.log.debug('Converting %s to FictionBook2 XML' % item.href)
            stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)

            # Start a <section> if we must sectionize each file or if the TOC references this page
            page_section_open = False
            if self.opts.sectionize == 'files' or None in self.toc.get(item.href, ()):
                text.append('<section>')
                page_section_open = True
                self.section_level += 1

            text += self.dump_text(item.data.find(XHTML('body')), stylizer, item)

            if page_section_open:
                text.append('</section>')
                self.section_level -= 1

        # Close any open sections
        while self.section_level > 0:
            text.append('</section>')
            self.section_level -= 1

        return ''.join(text) + '</body>'
Esempio n. 2
0
 def stylize_spine(self):
     self.stylizers = {}
     profile = self.context.source
     css = ''
     for item in self.items:
         html = item.data
         body = html.find(XHTML('body'))
         if 'style' in html.attrib:
             b = body.attrib.get('style', '')
             body.set('style',  html.get('style') + ';' + b)
             del html.attrib['style']
         bs = body.get('style', '').split(';')
         bs.append('margin-top: 0pt')
         bs.append('margin-bottom: 0pt')
         if float(self.context.margin_left) >= 0:
             bs.append('margin-left : %gpt'%
                     float(self.context.margin_left))
         if float(self.context.margin_right) >= 0:
             bs.append('margin-right : %gpt'%
                     float(self.context.margin_right))
         bs.extend(['padding-left: 0pt', 'padding-right: 0pt'])
         if self.page_break_on_body:
             bs.extend(['page-break-before: always'])
         if self.context.change_justification != 'original':
             bs.append('text-align: '+ self.context.change_justification)
         if self.body_font_family:
             bs.append('font-family: '+self.body_font_family)
         body.set('style', '; '.join(bs))
         stylizer = Stylizer(html, item.href, self.oeb, self.context, profile,
                 user_css=self.context.extra_css,
                 extra_css=css)
         self.stylizers[item] = stylizer
Esempio n. 3
0
 def stylizer(self, item):
     ans = self.stylizer_cache.get(item, None)
     if ans is None:
         ans = Stylizer(item.data, item.href, self.oeb, self.opts,
                 self.profile, base_css=self.base_css)
         self.stylizer_cache[item] = ans
     return ans
Esempio n. 4
0
    def mlize(self):
        from calibre.ebooks.oeb.base import XHTML
        from calibre.ebooks.oeb.stylizer import Stylizer
        from calibre.utils.xml_parse import safe_xml_fromstring
        output = [u'']
        stylizer = Stylizer(self.item.data, self.item.href, self.oeb_book,
                            self.opts, self.opts.output_profile)
        content = etree.tostring(self.item.data.find(XHTML('body')),
                                 encoding='unicode')
        #        content = self.remove_newlines(content)
        trees = {}
        for subitem, subtitle in self.subitems:
            snbcTree = etree.Element("snbc")
            snbcHead = etree.SubElement(snbcTree, "head")
            etree.SubElement(snbcHead, "title").text = subtitle
            if self.opts and self.opts.snb_hide_chapter_name:
                etree.SubElement(snbcHead, "hidetitle").text = "true"
            etree.SubElement(snbcTree, "body")
            trees[subitem] = snbcTree
        output.append('%s%s\n\n' % (CALIBRE_SNB_BM_TAG, ""))
        output += self.dump_text(self.subitems, safe_xml_fromstring(content),
                                 stylizer)[0]
        output = self.cleanup_text(''.join(output))

        subitem = ''
        bodyTree = trees[subitem].find(".//body")
        for line in output.splitlines():
            pos = line.find(CALIBRE_SNB_PRE_TAG)
            if pos == -1:
                line = line.strip(' \t\n\r\u3000')
            else:
                etree.SubElement(bodyTree, "text").text = \
                    etree.CDATA(line[pos+len(CALIBRE_SNB_PRE_TAG):])
                continue
            if len(line) != 0:
                if line.find(CALIBRE_SNB_IMG_TAG) == 0:
                    prefix = ProcessFileName(os.path.dirname(self.item.href))
                    if prefix != '':
                        etree.SubElement(bodyTree, "img").text = \
                            prefix + '_' + line[len(CALIBRE_SNB_IMG_TAG):]
                    else:
                        etree.SubElement(bodyTree, "img").text = \
                            line[len(CALIBRE_SNB_IMG_TAG):]
                elif line.find(CALIBRE_SNB_BM_TAG) == 0:
                    subitem = line[len(CALIBRE_SNB_BM_TAG):]
                    bodyTree = trees[subitem].find(".//body")
                else:
                    if self.opts and not self.opts.snb_dont_indent_first_line:
                        prefix = '\u3000\u3000'
                    else:
                        prefix = ''
                    etree.SubElement(bodyTree, "text").text = \
                        etree.CDATA(unicode_type(prefix + line))
                if self.opts and self.opts.snb_insert_empty_line:
                    etree.SubElement(bodyTree, "text").text = \
                        etree.CDATA('')

        return trees
Esempio n. 5
0
 def mlize_spine(self, oeb_book):
     output = ['']
     for item in oeb_book.spine:
         self.log.debug('Converting %s to Markdown formatted TXT...' % item.href)
         self.rewrite_ids(item.data, item)
         rewrite_links(item.data, partial(self.rewrite_link, page=item))
         stylizer = Stylizer(item.data, item.href, oeb_book, self.opts, self.opts.output_profile)
         output += self.dump_text(item.data.find(XHTML('body')), stylizer)
         output.append('\n\n')
     return ''.join(output)
Esempio n. 6
0
 def mangle_spine(self):
     id, href = self.oeb.manifest.generate('manglecase', 'manglecase.css')
     self.oeb.manifest.add(id, href, CSS_MIME, data=CASE_MANGLER_CSS)
     for item in self.oeb.spine:
         html = item.data
         relhref = item.relhref(href)
         etree.SubElement(html.find(XHTML('head')), XHTML('link'),
                          rel='stylesheet', href=relhref, type=CSS_MIME)
         stylizer = Stylizer(html, item.href, self.oeb, self.opts, self.profile)
         self.mangle_elem(html.find(XHTML('body')), stylizer)
Esempio n. 7
0
    def get_text(self):
        from calibre.ebooks.oeb.stylizer import Stylizer
        from calibre.ebooks.oeb.base import XHTML

        output = [u'']
        for item in self.oeb_book.spine:
            self.log.debug('Converting %s to RocketBook HTML...' % item.href)
            stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
            output.append(self.add_page_anchor(item))
            output += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
        return ''.join(output)
Esempio n. 8
0
 def mobimlize_spine(self):
     'Iterate over the spine and convert it to MOBIML'
     for item in self.oeb.spine:
         stylizer = Stylizer(item.data, item.href, self.oeb, self.opts, self.profile)
         body = item.data.find(XHTML('body'))
         nroot = etree.Element(XHTML('html'), nsmap=MOBI_NSMAP)
         nbody = etree.SubElement(nroot, XHTML('body'))
         self.current_spine_item = item
         self.mobimlize_elem(body, stylizer, BlockState(nbody),
                             [FormatState()])
         item.data = nroot
Esempio n. 9
0
    def get_text(self):
        from calibre.ebooks.oeb.stylizer import Stylizer
        from calibre.ebooks.oeb.base import XHTML

        text = ['']
        for item in self.oeb_book.spine:
            self.log.debug('Converting %s to PML markup...' % item.href)
            content = etree.tostring(item.data, encoding='unicode')
            content = self.prepare_text(content)
            content = safe_xml_fromstring(content)
            stylizer = Stylizer(content, item.href, self.oeb_book, self.opts, self.opts.output_profile)
            text.append(self.add_page_anchor(item))
            text += self.dump_text(content.find(XHTML('body')), stylizer, item)
        return ''.join(text)
Esempio n. 10
0
 def __init__(self, root, item, oeb, opts, map=HTML_MAP):
     self.item = item
     self.logger = oeb.logger
     self.manifest = oeb.manifest
     self.tags, self.tattrs = map
     self.buf = StringIO()
     self.anchors = []
     self.page_breaks = []
     self.is_html  = is_html = map is HTML_MAP
     self.stylizer = Stylizer(root, item.href, oeb, opts) if is_html else None
     self.tree_to_binary(root)
     self.content = self.buf.getvalue()
     self.ahc = self.build_ahc() if is_html else None
     self.aht = self.build_aht() if is_html else None
Esempio n. 11
0
 def mlize_spine(self, oeb_book):
     output = [
         u'<html><body><head><meta http-equiv="Content-Type" content="text/html;charset=utf-8" /></head>'
     ]
     for item in oeb_book.spine:
         self.log.debug('Converting %s to HTML...' % item.href)
         self.rewrite_ids(item.data, item)
         rewrite_links(item.data, partial(self.rewrite_link, page=item))
         stylizer = Stylizer(item.data, item.href, oeb_book, self.opts)
         output += self.dump_text(item.data.find(XHTML('body')), stylizer,
                                  item)
         output.append('\n\n')
     output.append('</body></html>')
     return ''.join(output)
Esempio n. 12
0
    def mlize_spine(self):
        from calibre.ebooks.oeb.base import XHTML
        from calibre.ebooks.oeb.stylizer import Stylizer
        from calibre.utils.xml_parse import safe_xml_fromstring
        output = self.header()
        if 'titlepage' in self.oeb_book.guide:
            href = self.oeb_book.guide['titlepage'].href
            item = self.oeb_book.manifest.hrefs[href]
            if item.spine_position is None:
                stylizer = Stylizer(item.data, item.href, self.oeb_book,
                                    self.opts, self.opts.output_profile)
                self.currently_dumping_item = item
                output += self.dump_text(item.data.find(XHTML('body')),
                                         stylizer)
                output += r'{\page }'
        for item in self.oeb_book.spine:
            self.log.debug('Converting %s to RTF markup...' % item.href)
            # Removing comments is needed as comments with -- inside them can
            # cause fromstring() to fail
            content = re.sub('<!--.*?-->',
                             '',
                             etree.tostring(item.data, encoding='unicode'),
                             flags=re.DOTALL)
            content = self.remove_newlines(content)
            content = self.remove_tabs(content)
            content = safe_xml_fromstring(content)
            stylizer = Stylizer(content, item.href, self.oeb_book, self.opts,
                                self.opts.output_profile)
            self.currently_dumping_item = item
            output += self.dump_text(content.find(XHTML('body')), stylizer)
            output += r'{\page }'
        output += self.footer()
        output = self.insert_images(output)
        output = self.clean_text(output)

        return output
Esempio n. 13
0
 def get_cover_page(self):
     from calibre.ebooks.oeb.stylizer import Stylizer
     from calibre.ebooks.oeb.base import XHTML
     output = u''
     if 'cover' in self.oeb_book.guide:
         if self.name_map.get(self.oeb_book.guide['cover'].href, None):
             output += '<IMG SRC="%s">' % self.name_map[self.oeb_book.guide['cover'].href]
     if 'titlepage' in self.oeb_book.guide:
         self.log.debug('Generating cover page...')
         href = self.oeb_book.guide['titlepage'].href
         item = self.oeb_book.manifest.hrefs[href]
         if item.spine_position is None:
             stylizer = Stylizer(item.data, item.href, self.oeb_book,
                     self.opts, self.opts.output_profile)
             output += ''.join(self.dump_text(item.data.find(XHTML('body')), stylizer, item))
     return output
Esempio n. 14
0
    def get_cover_page(self):
        from calibre.ebooks.oeb.stylizer import Stylizer
        from calibre.ebooks.oeb.base import XHTML

        output = ''
        if 'cover' in self.oeb_book.guide:
            output += '\\m="cover.png"\n'
            self.image_hrefs[self.oeb_book.guide['cover'].href] = 'cover.png'
        if 'titlepage' in self.oeb_book.guide:
            self.log.debug('Generating title page...')
            href = self.oeb_book.guide['titlepage'].href
            item = self.oeb_book.manifest.hrefs[href]
            if item.spine_position is None:
                stylizer = Stylizer(item.data, item.href, self.oeb_book,
                        self.opts, self.opts.output_profile)
                output += ''.join(self.dump_text(item.data.find(XHTML('body')), stylizer, item))
        return output
Esempio n. 15
0
 def mlize_spine(self, oeb_book):
     output = []
     for item in oeb_book.spine:
         self.log.debug('Converting %s to HTML...' % item.href)
         self.rewrite_ids(item.data, item)
         rewrite_links(item.data, partial(self.rewrite_link, page=item))
         stylizer = Stylizer(item.data, item.href, oeb_book, self.opts)
         output += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
         output.append('\n\n')
     if self.opts.htmlz_class_style == 'external':
         css = '<link href="style.css" rel="stylesheet" type="text/css" />'
     else:
         css =  '<style type="text/css">' + self.get_css(oeb_book) + '</style>'
     title = '<title>%s</title>' % prepare_string_for_xml(self.book_title)
     output = ['<html><head><meta http-equiv="Content-Type" content="text/html;charset=utf-8" />'] + \
         [css] + [title, '</head><body>'] + output + ['</body></html>']
     return ''.join(output)
Esempio n. 16
0
    def mlize_spine(self):
        from calibre.ebooks.oeb.base import XHTML
        from calibre.ebooks.oeb.stylizer import Stylizer
        output = [u'']
        output.append(self.get_toc())
        for item in self.oeb_book.spine:
            self.log.debug('Converting %s to TXT...' % item.href)
            content = unicode(etree.tostring(item.data, encoding=unicode))
            content = self.remove_newlines(content)
            content = etree.fromstring(content)
            stylizer = Stylizer(content, item.href, self.oeb_book, self.opts,
                                self.opts.output_profile)
            output += self.dump_text(content.find(XHTML('body')), stylizer,
                                     item)
            output += '\n\n\n\n\n\n'
        output = u''.join(output)
        output = u'\n'.join(l.rstrip() for l in output.splitlines())
        output = self.cleanup_text(output)

        return output
Esempio n. 17
0
    def mlize_spine(self):
        from calibre.ebooks.oeb.base import XHTML
        from calibre.ebooks.oeb.stylizer import Stylizer
        from calibre.utils.xml_parse import safe_xml_fromstring
        output = [u'']
        output.append(self.get_toc())
        for item in self.oeb_book.spine:
            self.log.debug('Converting %s to TXT...' % item.href)
            for x in item.data.iterdescendants(etree.Comment):
                if x.text and '--' in x.text:
                    x.text = x.text.replace('--', '__')
            content = etree.tostring(item.data, encoding='unicode')
            content = self.remove_newlines(content)
            content = safe_xml_fromstring(content)
            stylizer = Stylizer(content, item.href, self.oeb_book, self.opts,
                                self.opts.output_profile)
            output += self.dump_text(content.find(XHTML('body')), stylizer,
                                     item)
            output += '\n\n\n\n\n\n'
        output = ''.join(output)
        output = '\n'.join(l.rstrip() for l in output.splitlines())
        output = self.cleanup_text(output)

        return output
Esempio n. 18
0
class ReBinary(object):
    NSRMAP = {'': None, XML_NS: 'xml'}

    def __init__(self, root, item, oeb, opts, map=HTML_MAP):
        self.item = item
        self.logger = oeb.logger
        self.manifest = oeb.manifest
        self.tags, self.tattrs = map
        self.buf = StringIO()
        self.anchors = []
        self.page_breaks = []
        self.is_html = is_html = map is HTML_MAP
        self.stylizer = Stylizer(root, item.href, oeb,
                                 opts) if is_html else None
        self.tree_to_binary(root)
        self.content = self.buf.getvalue()
        self.ahc = self.build_ahc() if is_html else None
        self.aht = self.build_aht() if is_html else None

    def write(self, *values):
        for value in values:
            if isinstance(value, (int, long)):
                try:
                    value = unichr(value)
                except OverflowError:
                    self.logger.warn('Unicode overflow for integer:', value)
                    value = u'?'
            self.buf.write(value.encode('utf-8'))

    def is_block(self, style):
        return style['display'] not in ('inline', 'inline-block')

    def tree_to_binary(self,
                       elem,
                       nsrmap=NSRMAP,
                       parents=[],
                       inhead=False,
                       preserve=False):
        if not isinstance(elem.tag, basestring):
            # Don't emit any comments or raw entities
            return
        nsrmap = copy.copy(nsrmap)
        attrib = dict(elem.attrib)
        style = self.stylizer.style(elem) if self.stylizer else None
        for key, value in elem.nsmap.items():
            if value not in nsrmap or nsrmap[value] != key:
                xmlns = ('xmlns:' + key) if key else 'xmlns'
                attrib[xmlns] = value
            nsrmap[value] = key
        tag = prefixname(elem.tag, nsrmap)
        tag_offset = self.buf.tell()
        if tag == 'head':
            inhead = True
        flags = FLAG_OPENING
        if not elem.text and len(elem) == 0:
            flags |= FLAG_CLOSING
        if inhead:
            flags |= FLAG_HEAD
        if style and self.is_block(style):
            flags |= FLAG_BLOCK
        self.write(0, flags)
        tattrs = self.tattrs[0]
        if tag in self.tags:
            index = self.tags[tag]
            self.write(index)
            if self.tattrs[index]:
                tattrs = self.tattrs[index]
        else:
            self.write(FLAG_CUSTOM, len(tag) + 1, tag)
        last_break = self.page_breaks[-1][0] if self.page_breaks else None
        if style and last_break != tag_offset \
           and style['page-break-before'] in PAGE_BREAKS:
            self.page_breaks.append((tag_offset, list(parents)))
        for attr, value in attrib.items():
            attr = prefixname(attr, nsrmap)
            if attr in ('href', 'src'):
                value = urlnormalize(value)
                path, frag = urldefrag(value)
                if self.item:
                    path = self.item.abshref(path)
                prefix = unichr(3)
                if path in self.manifest.hrefs:
                    prefix = unichr(2)
                    value = self.manifest.hrefs[path].id
                    if frag:
                        value = '#'.join((value, frag))
                value = prefix + value
            elif attr in ('id', 'name'):
                self.anchors.append((value, tag_offset))
            elif attr.startswith('ms--'):
                attr = '%' + attr[4:]
            elif tag == 'link' and attr == 'type' and value in OEB_STYLES:
                value = CSS_MIME
            if attr in tattrs:
                self.write(tattrs[attr])
            else:
                self.write(FLAG_CUSTOM, len(attr) + 1, attr)
            try:
                self.write(ATTR_NUMBER, int(value) + 1)
            except ValueError:
                self.write(len(value) + 1, value)
        self.write(0)
        old_preserve = preserve
        if style:
            preserve = (style['white-space'] in ('pre', 'pre-wrap'))
        xml_space = elem.get(XML('space'))
        if xml_space == 'preserve':
            preserve = True
        elif xml_space == 'normal':
            preserve = False
        if elem.text:
            if preserve:
                self.write(elem.text)
            elif len(elem) == 0 or not elem.text.isspace():
                self.write(COLLAPSE.sub(' ', elem.text))
            # else: de nada
        parents.append(tag_offset)
        child = cstyle = nstyle = None
        for next in chain(elem, [None]):
            if self.stylizer:
                nstyle = None if next is None else self.stylizer.style(next)
            if child is not None:
                if not preserve \
                   and (inhead or not nstyle or self.is_block(cstyle) or self.is_block(nstyle)) \
                   and child.tail and child.tail.isspace():
                    child.tail = None
                self.tree_to_binary(child, nsrmap, parents, inhead, preserve)
            child, cstyle = next, nstyle
        parents.pop()
        preserve = old_preserve
        if not flags & FLAG_CLOSING:
            self.write(0, (flags & ~FLAG_OPENING) | FLAG_CLOSING, 0)
        if elem.tail and tag != 'html':
            tail = elem.tail
            if not preserve:
                tail = COLLAPSE.sub(' ', tail)
            self.write(tail)
        if style and style['page-break-after'] not in ('avoid', 'auto'):
            self.page_breaks.append((self.buf.tell(), list(parents)))

    def build_ahc(self):
        if len(self.anchors) > 6:
            self.logger.warn("More than six anchors in file %r. "
                             "Some links may not work properly." %
                             self.item.href)
        data = StringIO()
        data.write(unichr(len(self.anchors)).encode('utf-8'))
        for anchor, offset in self.anchors:
            data.write(unichr(len(anchor)).encode('utf-8'))
            data.write(anchor)
            data.write(pack('<I', offset))
        return data.getvalue()

    def build_aht(self):
        return pack('<I', 0)
Esempio n. 19
0
class ReBinary(object):
    NSRMAP = {'': None, XML_NS: 'xml'}

    def __init__(self, root, item, oeb, opts, map=HTML_MAP):
        self.item = item
        self.logger = oeb.logger
        self.manifest = oeb.manifest
        self.tags, self.tattrs = map
        self.buf = StringIO()
        self.anchors = []
        self.page_breaks = []
        self.is_html  = is_html = map is HTML_MAP
        self.stylizer = Stylizer(root, item.href, oeb, opts) if is_html else None
        self.tree_to_binary(root)
        self.content = self.buf.getvalue()
        self.ahc = self.build_ahc() if is_html else None
        self.aht = self.build_aht() if is_html else None

    def write(self, *values):
        for value in values:
            if isinstance(value, (int, long)):
                try:
                    value = unichr(value)
                except OverflowError:
                    self.logger.warn('Unicode overflow for integer:', value)
                    value = u'?'
            self.buf.write(value.encode('utf-8'))

    def is_block(self, style):
        return style['display'] not in ('inline', 'inline-block')

    def tree_to_binary(self, elem, nsrmap=NSRMAP, parents=[],
                       inhead=False, preserve=False):
        if not isinstance(elem.tag, basestring):
            # Don't emit any comments or raw entities
            return
        nsrmap = copy.copy(nsrmap)
        attrib = dict(elem.attrib)
        style = self.stylizer.style(elem) if self.stylizer else None
        for key, value in elem.nsmap.items():
            if value not in nsrmap or nsrmap[value] != key:
                xmlns = ('xmlns:' + key) if key else 'xmlns'
                attrib[xmlns] = value
            nsrmap[value] = key
        tag = prefixname(elem.tag, nsrmap)
        tag_offset = self.buf.tell()
        if tag == 'head':
            inhead = True
        flags = FLAG_OPENING
        if not elem.text and len(elem) == 0:
            flags |= FLAG_CLOSING
        if inhead:
            flags |= FLAG_HEAD
        if style and self.is_block(style):
            flags |= FLAG_BLOCK
        self.write(0, flags)
        tattrs = self.tattrs[0]
        if tag in self.tags:
            index = self.tags[tag]
            self.write(index)
            if self.tattrs[index]:
                tattrs = self.tattrs[index]
        else:
            self.write(FLAG_CUSTOM, len(tag)+1, tag)
        last_break = self.page_breaks[-1][0] if self.page_breaks else None
        if style and last_break != tag_offset \
           and style['page-break-before'] in PAGE_BREAKS:
            self.page_breaks.append((tag_offset, list(parents)))
        for attr, value in attrib.items():
            attr = prefixname(attr, nsrmap)
            if attr in ('href', 'src'):
                value = urlnormalize(value)
                path, frag = urldefrag(value)
                if self.item:
                    path = self.item.abshref(path)
                prefix = unichr(3)
                if path in self.manifest.hrefs:
                    prefix = unichr(2)
                    value = self.manifest.hrefs[path].id
                    if frag:
                        value = '#'.join((value, frag))
                value = prefix + value
            elif attr in ('id', 'name'):
                self.anchors.append((value, tag_offset))
            elif attr.startswith('ms--'):
                attr = '%' + attr[4:]
            elif tag == 'link' and attr == 'type' and value in OEB_STYLES:
                value = CSS_MIME
            if attr in tattrs:
                self.write(tattrs[attr])
            else:
                self.write(FLAG_CUSTOM, len(attr)+1, attr)
            try:
                self.write(ATTR_NUMBER, int(value)+1)
            except ValueError:
                self.write(len(value)+1, value)
        self.write(0)
        old_preserve = preserve
        if style:
            preserve = (style['white-space'] in ('pre', 'pre-wrap'))
        xml_space = elem.get(XML('space'))
        if xml_space == 'preserve':
            preserve = True
        elif xml_space == 'normal':
            preserve = False
        if elem.text:
            if preserve:
                self.write(elem.text)
            elif len(elem) == 0 or not elem.text.isspace():
                self.write(COLLAPSE.sub(' ', elem.text))
            # else: de nada
        parents.append(tag_offset)
        child = cstyle = nstyle = None
        for next in chain(elem, [None]):
            if self.stylizer:
                nstyle = None if next is None else self.stylizer.style(next)
            if child is not None:
                if not preserve \
                   and (inhead or not nstyle or self.is_block(cstyle) or self.is_block(nstyle)) \
                   and child.tail and child.tail.isspace():
                    child.tail = None
                self.tree_to_binary(child, nsrmap, parents, inhead, preserve)
            child, cstyle = next, nstyle
        parents.pop()
        preserve = old_preserve
        if not flags & FLAG_CLOSING:
            self.write(0, (flags & ~FLAG_OPENING) | FLAG_CLOSING, 0)
        if elem.tail and tag != 'html':
            tail = elem.tail
            if not preserve:
                tail = COLLAPSE.sub(' ', tail)
            self.write(tail)
        if style and style['page-break-after'] not in ('avoid', 'auto'):
            self.page_breaks.append((self.buf.tell(), list(parents)))

    def build_ahc(self):
        if len(self.anchors) > 6:
            self.logger.warn("More than six anchors in file %r. "
                "Some links may not work properly." % self.item.href)
        data = StringIO()
        data.write(unichr(len(self.anchors)).encode('utf-8'))
        for anchor, offset in self.anchors:
            data.write(unichr(len(anchor)).encode('utf-8'))
            data.write(anchor)
            data.write(pack('<I', offset))
        return data.getvalue()

    def build_aht(self):
        return pack('<I', 0)