Python ListsManager Examples

Programming Language: Python

Namespace/Package Name: calibre.ebooks.docx.writer.lists

Class/Type: ListsManager

Examples at hotexamples.com: 8

Python ListsManager - 8 examples found. These are the top rated real world Python examples of calibre.ebooks.docx.writer.lists.ListsManager extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

ListsManager(2)

finalize(2)

serialize(2)

Example #1

Show file

    def __call__(self):
        from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
        self.svg_rasterizer = SVGRasterizer()
        self.svg_rasterizer(self.oeb, self.opts)

        self.styles_manager = StylesManager(self.docx.namespace)
        self.images_manager = ImagesManager(self.oeb,
                                            self.docx.document_relationships)
        self.lists_manager = ListsManager(self.docx)
        self.fonts_manager = FontsManager(self.docx.namespace, self.oeb,
                                          self.opts)
        self.blocks = Blocks(self.docx.namespace, self.styles_manager)

        for item in self.oeb.spine:
            self.process_item(item)

        all_blocks = self.blocks.all_blocks
        remove_blocks = []
        for i, block in enumerate(all_blocks):
            try:
                nb = all_blocks[i + 1]
            except IndexError:
                break
            block.resolve_skipped(nb)
            if block.skipped:
                remove_blocks.append((i, block))
        for pos, block in reversed(remove_blocks):
            self.blocks.delete_block_at(pos)

        self.lists_manager.finalize(all_blocks)
        self.styles_manager.finalize(all_blocks)
        self.write()

Example #2

Show file

File: from_html.py Project: smdx023/calibre

    def __call__(self):
        from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
        self.svg_rasterizer = SVGRasterizer(base_css=self.base_css)
        self.svg_rasterizer(self.oeb, self.opts)

        self.styles_manager = StylesManager(self.docx.namespace, self.log,
                                            self.mi.language)
        self.links_manager = LinksManager(self.docx.namespace,
                                          self.docx.document_relationships,
                                          self.log)
        self.images_manager = ImagesManager(self.oeb,
                                            self.docx.document_relationships,
                                            self.opts)
        self.lists_manager = ListsManager(self.docx)
        self.fonts_manager = FontsManager(self.docx.namespace, self.oeb,
                                          self.opts)
        self.blocks = Blocks(self.docx.namespace, self.styles_manager,
                             self.links_manager)
        self.current_link = self.current_lang = None

        for item in self.oeb.spine:
            self.log.debug('Processing', item.href)
            self.process_item(item)
        if self.add_toc:
            self.links_manager.process_toc_links(self.oeb)

        if self.add_cover and self.oeb.metadata.cover and str(
                self.oeb.metadata.cover[0]) in self.oeb.manifest.ids:
            cover_id = str(self.oeb.metadata.cover[0])
            item = self.oeb.manifest.ids[cover_id]
            self.cover_img = self.images_manager.read_image(item.href)

        all_blocks = self.blocks.all_blocks
        remove_blocks = []
        for i, block in enumerate(all_blocks):
            try:
                nb = all_blocks[i + 1]
            except IndexError:
                break
            block.resolve_skipped(nb)
            if block.skipped:
                remove_blocks.append((i, block))
        for pos, block in reversed(remove_blocks):
            self.blocks.delete_block_at(pos)
        self.blocks.all_blocks[0].is_first_block = True
        self.blocks.apply_page_break_after()
        self.blocks.resolve_language()

        if self.cover_img is not None:
            self.cover_img = self.images_manager.create_cover_markup(
                self.cover_img, self.opts.preserve_cover_aspect_ratio,
                *page_size(self.opts))
        self.lists_manager.finalize(all_blocks)
        self.styles_manager.finalize(all_blocks)
        self.write()

Example #3

Show file

File: from_html.py Project: educhenm/calibre

    def __call__(self):
        from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
        self.svg_rasterizer = SVGRasterizer(base_css=self.base_css)
        self.svg_rasterizer(self.oeb, self.opts)

        self.styles_manager = StylesManager(self.docx.namespace)
        self.links_manager = LinksManager(self.docx.namespace, self.docx.document_relationships)
        self.images_manager = ImagesManager(self.oeb, self.docx.document_relationships)
        self.lists_manager = ListsManager(self.docx)
        self.fonts_manager = FontsManager(self.docx.namespace, self.oeb, self.opts)
        self.blocks = Blocks(self.docx.namespace, self.styles_manager, self.links_manager)
        self.current_link = None

        for item in self.oeb.spine:
            self.process_item(item)

        all_blocks = self.blocks.all_blocks
        remove_blocks = []
        for i, block in enumerate(all_blocks):
            try:
                nb = all_blocks[i+1]
            except IndexError:
                break
            block.resolve_skipped(nb)
            if block.skipped:
                remove_blocks.append((i, block))
        for pos, block in reversed(remove_blocks):
            self.blocks.delete_block_at(pos)
        self.blocks.all_blocks[0].is_first_block = True

        self.lists_manager.finalize(all_blocks)
        self.styles_manager.finalize(all_blocks)
        self.write()

Example #4

Show file

File: from_html.py Project: aimylios/calibre

    def __call__(self):
        from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
        self.svg_rasterizer = SVGRasterizer(base_css=self.base_css)
        self.svg_rasterizer(self.oeb, self.opts)

        self.styles_manager = StylesManager(self.docx.namespace, self.log, self.mi.language)
        self.links_manager = LinksManager(self.docx.namespace, self.docx.document_relationships, self.log)
        self.images_manager = ImagesManager(self.oeb, self.docx.document_relationships, self.opts)
        self.lists_manager = ListsManager(self.docx)
        self.fonts_manager = FontsManager(self.docx.namespace, self.oeb, self.opts)
        self.blocks = Blocks(self.docx.namespace, self.styles_manager, self.links_manager)
        self.current_link = self.current_lang = None

        for item in self.oeb.spine:
            self.log.debug('Processing', item.href)
            self.process_item(item)
        if self.add_toc:
            self.links_manager.process_toc_links(self.oeb)

        if self.add_cover and self.oeb.metadata.cover and unicode(self.oeb.metadata.cover[0]) in self.oeb.manifest.ids:
            cover_id = unicode(self.oeb.metadata.cover[0])
            item = self.oeb.manifest.ids[cover_id]
            self.cover_img = self.images_manager.read_image(item.href)

        all_blocks = self.blocks.all_blocks
        remove_blocks = []
        for i, block in enumerate(all_blocks):
            try:
                nb = all_blocks[i+1]
            except IndexError:
                break
            block.resolve_skipped(nb)
            if block.skipped:
                remove_blocks.append((i, block))
        for pos, block in reversed(remove_blocks):
            self.blocks.delete_block_at(pos)
        self.blocks.all_blocks[0].is_first_block = True
        self.blocks.apply_page_break_after()
        self.blocks.resolve_language()

        if self.cover_img is not None:
            self.cover_img = self.images_manager.create_cover_markup(self.cover_img, self.opts.preserve_cover_aspect_ratio, *page_size(self.opts))
        self.lists_manager.finalize(all_blocks)
        self.styles_manager.finalize(all_blocks)
        self.write()

Example #5

Show file

File: from_html.py Project: smdx023/calibre

class Convert:

    # Word does not apply default styling to hyperlinks, so we ensure they get
    # default styling (the conversion pipeline does not apply any styling to
    # them).
    base_css = '''
    a[href] { text-decoration: underline; color: blue }
    '''

    def __init__(self, oeb, docx, mi, add_cover, add_toc):
        self.oeb, self.docx, self.add_cover, self.add_toc = oeb, docx, add_cover, add_toc
        self.log, self.opts = docx.log, docx.opts
        self.mi = mi
        self.cover_img = None
        p = self.opts.output_profile
        p.width_pts, p.height_pts = page_effective_area(self.opts)

    def __call__(self):
        from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
        self.svg_rasterizer = SVGRasterizer(base_css=self.base_css)
        self.svg_rasterizer(self.oeb, self.opts)

        self.styles_manager = StylesManager(self.docx.namespace, self.log,
                                            self.mi.language)
        self.links_manager = LinksManager(self.docx.namespace,
                                          self.docx.document_relationships,
                                          self.log)
        self.images_manager = ImagesManager(self.oeb,
                                            self.docx.document_relationships,
                                            self.opts)
        self.lists_manager = ListsManager(self.docx)
        self.fonts_manager = FontsManager(self.docx.namespace, self.oeb,
                                          self.opts)
        self.blocks = Blocks(self.docx.namespace, self.styles_manager,
                             self.links_manager)
        self.current_link = self.current_lang = None

        for item in self.oeb.spine:
            self.log.debug('Processing', item.href)
            self.process_item(item)
        if self.add_toc:
            self.links_manager.process_toc_links(self.oeb)

        if self.add_cover and self.oeb.metadata.cover and str(
                self.oeb.metadata.cover[0]) in self.oeb.manifest.ids:
            cover_id = str(self.oeb.metadata.cover[0])
            item = self.oeb.manifest.ids[cover_id]
            self.cover_img = self.images_manager.read_image(item.href)

        all_blocks = self.blocks.all_blocks
        remove_blocks = []
        for i, block in enumerate(all_blocks):
            try:
                nb = all_blocks[i + 1]
            except IndexError:
                break
            block.resolve_skipped(nb)
            if block.skipped:
                remove_blocks.append((i, block))
        for pos, block in reversed(remove_blocks):
            self.blocks.delete_block_at(pos)
        self.blocks.all_blocks[0].is_first_block = True
        self.blocks.apply_page_break_after()
        self.blocks.resolve_language()

        if self.cover_img is not None:
            self.cover_img = self.images_manager.create_cover_markup(
                self.cover_img, self.opts.preserve_cover_aspect_ratio,
                *page_size(self.opts))
        self.lists_manager.finalize(all_blocks)
        self.styles_manager.finalize(all_blocks)
        self.write()

    def process_item(self, item):
        self.current_item = item
        stylizer = self.svg_rasterizer.stylizer_cache.get(item)
        if stylizer is None:
            stylizer = Stylizer(item.data,
                                item.href,
                                self.oeb,
                                self.opts,
                                profile=self.opts.output_profile,
                                base_css=self.base_css)
        self.abshref = self.images_manager.abshref = item.abshref

        self.current_lang = lang_for_tag(
            item.data) or self.styles_manager.document_lang
        for i, body in enumerate(XPath('//h:body')(item.data)):
            with self.blocks:
                self.blocks.top_bookmark = self.links_manager.bookmark_for_anchor(
                    self.links_manager.top_anchor, self.current_item, body)
                self.process_tag(body, stylizer, is_first_tag=i == 0)

    def process_tag(self,
                    html_tag,
                    stylizer,
                    is_first_tag=False,
                    float_spec=None):
        tagname = barename(html_tag.tag)
        tag_style = stylizer.style(html_tag)
        ignore_tag_contents = tagname in {'script', 'style', 'title', 'meta'
                                          } or tag_style.is_hidden
        display = tag_style._get('display')
        is_block = False

        if not ignore_tag_contents:
            previous_link = self.current_link
            if tagname == 'a' and html_tag.get('href'):
                self.current_link = (self.current_item, html_tag.get('href'),
                                     html_tag.get('title'))
            previous_lang = self.current_lang
            tag_lang = lang_for_tag(html_tag)
            if tag_lang:
                self.current_lang = tag_lang

            is_float = tag_style['float'] in {'left', 'right'
                                              } and not is_first_tag
            if float_spec is None and is_float:
                float_spec = FloatSpec(self.docx.namespace, html_tag,
                                       tag_style)

            if display in {
                    'inline', 'inline-block'
            } or tagname == 'br':  # <br> has display:block but we dont want to start a new paragraph
                if is_float and float_spec.is_dropcaps:
                    self.add_block_tag(tagname,
                                       html_tag,
                                       tag_style,
                                       stylizer,
                                       float_spec=float_spec)
                    float_spec = None
                else:
                    self.add_inline_tag(tagname, html_tag, tag_style, stylizer)
            elif display == 'list-item':
                self.add_block_tag(tagname,
                                   html_tag,
                                   tag_style,
                                   stylizer,
                                   is_list_item=True)
            elif display.startswith('table') or display == 'inline-table':
                if display == 'table-cell':
                    self.blocks.start_new_cell(html_tag, tag_style)
                    self.add_block_tag(tagname,
                                       html_tag,
                                       tag_style,
                                       stylizer,
                                       is_table_cell=True)
                elif display == 'table-row':
                    self.blocks.start_new_row(html_tag, tag_style)
                elif display in {'table', 'inline-table'}:
                    self.blocks.end_current_block()
                    self.blocks.start_new_table(html_tag, tag_style)
            else:
                if tagname == 'img' and is_float:
                    # Image is floating so dont start a new paragraph for it
                    self.add_inline_tag(tagname, html_tag, tag_style, stylizer)
                else:
                    if tagname == 'hr':
                        for edge in 'right bottom left'.split():
                            tag_style.set('border-%s-style' % edge, 'none')
                    self.add_block_tag(tagname,
                                       html_tag,
                                       tag_style,
                                       stylizer,
                                       float_spec=float_spec)

            for child in html_tag.iterchildren():
                if isinstance(getattr(child, 'tag', None), string_or_bytes):
                    self.process_tag(child, stylizer, float_spec=float_spec)
                else:  # Comment/PI/etc.
                    tail = getattr(child, 'tail', None)
                    if tail:
                        block = self.create_block_from_parent(
                            html_tag, stylizer)
                        block.add_text(tail,
                                       tag_style,
                                       is_parent_style=False,
                                       link=self.current_link,
                                       lang=self.current_lang)

            is_block = html_tag in self.blocks.open_html_blocks
            self.blocks.finish_tag(html_tag)
            if is_block and tag_style['page-break-after'] == 'avoid':
                self.blocks.all_blocks[-1].keep_next = True

            self.current_link = previous_link
            self.current_lang = previous_lang

        # Now, process the tail if any

        if display == 'table-row':
            return  # We ignore the tail for these tags

        ignore_whitespace_tail = is_block or display.startswith('table')
        if not is_first_tag and html_tag.tail and (
                not ignore_whitespace_tail or not html_tag.tail.isspace()):
            # Ignore trailing space after a block tag, as otherwise it will
            # become a new empty paragraph
            block = self.create_block_from_parent(html_tag, stylizer)
            block.add_text(html_tag.tail,
                           stylizer.style(html_tag.getparent()),
                           is_parent_style=True,
                           link=self.current_link,
                           lang=self.current_lang)

    def create_block_from_parent(self, html_tag, stylizer):
        parent = html_tag.getparent()
        block = self.blocks.current_or_new_block(parent,
                                                 stylizer.style(parent))
        # Do not inherit page-break-before from parent
        block.page_break_before = False
        return block

    def add_block_tag(self,
                      tagname,
                      html_tag,
                      tag_style,
                      stylizer,
                      is_table_cell=False,
                      float_spec=None,
                      is_list_item=False):
        block = self.blocks.start_new_block(html_tag,
                                            tag_style,
                                            is_table_cell=is_table_cell,
                                            float_spec=float_spec,
                                            is_list_item=is_list_item)
        anchor = html_tag.get('id') or html_tag.get('name')
        if anchor:
            block.bookmarks.add(self.bookmark_for_anchor(anchor, html_tag))
        if tagname == 'img':
            self.images_manager.add_image(html_tag,
                                          block,
                                          stylizer,
                                          as_block=True)
        else:
            text = html_tag.text
            is_list_item = tagname == 'li'
            has_sublist = is_list_item and len(html_tag) and barename(
                html_tag[0].tag) in ('ul', 'ol') and len(html_tag[0])
            if text and has_sublist and not text.strip():
                text = ''  # whitespace only, ignore
            if text:
                block.add_text(text,
                               tag_style,
                               ignore_leading_whitespace=True,
                               is_parent_style=True,
                               link=self.current_link,
                               lang=self.current_lang)
            elif has_sublist:
                block.force_not_empty = True

    def add_inline_tag(self, tagname, html_tag, tag_style, stylizer):
        anchor = html_tag.get('id') or html_tag.get('name') or None
        bmark = None
        if anchor:
            bmark = self.bookmark_for_anchor(anchor, html_tag)
        if tagname == 'br':
            if html_tag.tail or html_tag is not tuple(
                    html_tag.getparent().iterchildren('*'))[-1]:
                block = self.create_block_from_parent(html_tag, stylizer)
                block.add_break(clear={
                    'both': 'all',
                    'left': 'left',
                    'right': 'right'
                }.get(tag_style['clear'], 'none'),
                                bookmark=bmark)
        elif tagname == 'img':
            block = self.create_block_from_parent(html_tag, stylizer)
            self.images_manager.add_image(html_tag,
                                          block,
                                          stylizer,
                                          bookmark=bmark)
        else:
            if html_tag.text:
                block = self.create_block_from_parent(html_tag, stylizer)
                block.add_text(html_tag.text,
                               tag_style,
                               is_parent_style=False,
                               bookmark=bmark,
                               link=self.current_link,
                               lang=self.current_lang)
            elif bmark:
                block = self.create_block_from_parent(html_tag, stylizer)
                block.add_text('',
                               tag_style,
                               is_parent_style=False,
                               bookmark=bmark,
                               link=self.current_link,
                               lang=self.current_lang)

    def bookmark_for_anchor(self, anchor, html_tag):
        return self.links_manager.bookmark_for_anchor(anchor,
                                                      self.current_item,
                                                      html_tag)

    def write(self):
        self.docx.document, self.docx.styles, body = create_skeleton(self.opts)
        self.blocks.serialize(body)
        body.append(body[0])  # Move <sectPr> to the end
        if self.links_manager.toc:
            self.links_manager.serialize_toc(
                body, self.styles_manager.primary_heading_style)
        if self.cover_img is not None:
            self.images_manager.write_cover_block(body, self.cover_img)
        self.styles_manager.serialize(self.docx.styles)
        self.images_manager.serialize(self.docx.images)
        self.fonts_manager.serialize(self.styles_manager.text_styles,
                                     self.docx.font_table,
                                     self.docx.embedded_fonts, self.docx.fonts)
        self.lists_manager.serialize(self.docx.numbering)

Example #6

Show file

File: from_html.py Project: educhenm/calibre

class Convert(object):

    # Word does not apply default styling to hyperlinks, so we ensure they get
    # default styling (the conversion pipeline does not apply any styling to
    # them).
    base_css = '''
    a[href] { text-decoration: underline; color: blue }
    '''

    def __init__(self, oeb, docx):
        self.oeb, self.docx = oeb, docx
        self.log, self.opts = docx.log, docx.opts

    def __call__(self):
        from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
        self.svg_rasterizer = SVGRasterizer(base_css=self.base_css)
        self.svg_rasterizer(self.oeb, self.opts)

        self.styles_manager = StylesManager(self.docx.namespace)
        self.links_manager = LinksManager(self.docx.namespace, self.docx.document_relationships)
        self.images_manager = ImagesManager(self.oeb, self.docx.document_relationships)
        self.lists_manager = ListsManager(self.docx)
        self.fonts_manager = FontsManager(self.docx.namespace, self.oeb, self.opts)
        self.blocks = Blocks(self.docx.namespace, self.styles_manager, self.links_manager)
        self.current_link = None

        for item in self.oeb.spine:
            self.process_item(item)

        all_blocks = self.blocks.all_blocks
        remove_blocks = []
        for i, block in enumerate(all_blocks):
            try:
                nb = all_blocks[i+1]
            except IndexError:
                break
            block.resolve_skipped(nb)
            if block.skipped:
                remove_blocks.append((i, block))
        for pos, block in reversed(remove_blocks):
            self.blocks.delete_block_at(pos)
        self.blocks.all_blocks[0].is_first_block = True

        self.lists_manager.finalize(all_blocks)
        self.styles_manager.finalize(all_blocks)
        self.write()

    def process_item(self, item):
        self.current_item = item
        stylizer = self.svg_rasterizer.stylizer_cache.get(item)
        if stylizer is None:
            stylizer = Stylizer(item.data, item.href, self.oeb, self.opts, self.opts.output_profile, base_css=self.base_css)
        self.abshref = self.images_manager.abshref = item.abshref

        for i, body in enumerate(XPath('//h:body')(item.data)):
            with self.blocks:
                body.set('id', body.get('id', None) or self.links_manager.top_anchor)
                self.process_tag(body, stylizer, is_first_tag=i == 0)

    def process_tag(self, html_tag, stylizer, is_first_tag=False, float_spec=None):
        tagname = barename(html_tag.tag)
        if tagname in {'script', 'style', 'title', 'meta'}:
            return
        tag_style = stylizer.style(html_tag)
        if tag_style.is_hidden:
            return

        previous_link = self.current_link
        if tagname == 'a' and html_tag.get('href'):
            self.current_link = (self.current_item, html_tag.get('href'), html_tag.get('title'))

        display = tag_style._get('display')
        is_float = tag_style['float'] in {'left', 'right'} and not is_first_tag
        if float_spec is None and is_float:
            float_spec = FloatSpec(self.docx.namespace, html_tag, tag_style)

        if display in {'inline', 'inline-block'} or tagname == 'br':  # <br> has display:block but we dont want to start a new paragraph
            if is_float and float_spec.is_dropcaps:
                self.add_block_tag(tagname, html_tag, tag_style, stylizer, float_spec=float_spec)
                float_spec = None
            else:
                self.add_inline_tag(tagname, html_tag, tag_style, stylizer)
        elif display == 'list-item':
            self.add_block_tag(tagname, html_tag, tag_style, stylizer, is_list_item=True)
        elif display.startswith('table') or display == 'inline-table':
            if display == 'table-cell':
                self.blocks.start_new_cell(html_tag, tag_style)
                self.add_block_tag(tagname, html_tag, tag_style, stylizer, is_table_cell=True)
            elif display == 'table-row':
                self.blocks.start_new_row(html_tag, tag_style)
            elif display in {'table', 'inline-table'}:
                self.blocks.end_current_block()
                self.blocks.start_new_table(html_tag, tag_style)
        else:
            if tagname == 'img' and is_float:
                # Image is floating so dont start a new paragraph for it
                self.add_inline_tag(tagname, html_tag, tag_style, stylizer)
            else:
                self.add_block_tag(tagname, html_tag, tag_style, stylizer, float_spec=float_spec)

        for child in html_tag.iterchildren('*'):
            self.process_tag(child, stylizer, float_spec=float_spec)

        is_block = html_tag in self.blocks.open_html_blocks
        self.blocks.finish_tag(html_tag)
        if is_block and tag_style['page-break-after'] == 'avoid':
            self.blocks.all_blocks[-1].keep_next = True

        self.current_link = previous_link

        if display == 'table-row':
            return  # We ignore the tail for these tags

        ignore_whitespace_tail = is_block or display.startswith('table')
        if not is_first_tag and html_tag.tail and (not ignore_whitespace_tail or not html_tag.tail.isspace()):
            # Ignore trailing space after a block tag, as otherwise it will
            # become a new empty paragraph
            block = self.blocks.current_or_new_block(html_tag.getparent(), stylizer.style(html_tag.getparent()))
            block.add_text(html_tag.tail, stylizer.style(html_tag.getparent()), is_parent_style=True, link=self.current_link)

    def add_block_tag(self, tagname, html_tag, tag_style, stylizer, is_table_cell=False, float_spec=None, is_list_item=False):
        block = self.blocks.start_new_block(html_tag, tag_style, is_table_cell=is_table_cell, float_spec=float_spec, is_list_item=is_list_item)
        anchor = html_tag.get('id') or html_tag.get('name')
        if anchor:
            block.bookmarks.add(self.bookmark_for_anchor(anchor, html_tag))
        if tagname == 'img':
            self.images_manager.add_image(html_tag, block, stylizer, as_block=True)
        else:
            if html_tag.text:
                block.add_text(html_tag.text, tag_style, ignore_leading_whitespace=True, is_parent_style=True, link=self.current_link)

    def add_inline_tag(self, tagname, html_tag, tag_style, stylizer):
        anchor = html_tag.get('id') or html_tag.get('name') or None
        bmark = None
        if anchor:
            bmark = self.bookmark_for_anchor(anchor, html_tag)
        if tagname == 'br':
            if html_tag.tail or html_tag is not tuple(html_tag.getparent().iterchildren('*'))[-1]:
                block = self.blocks.current_or_new_block(html_tag.getparent(), stylizer.style(html_tag.getparent()))
                block.add_break(clear={'both':'all', 'left':'left', 'right':'right'}.get(tag_style['clear'], 'none'), bookmark=bmark)
        elif tagname == 'img':
            block = self.blocks.current_or_new_block(html_tag.getparent(), stylizer.style(html_tag.getparent()))
            self.images_manager.add_image(html_tag, block, stylizer, bookmark=bmark)
        else:
            if html_tag.text:
                block = self.blocks.current_or_new_block(html_tag.getparent(), stylizer.style(html_tag.getparent()))
                block.add_text(html_tag.text, tag_style, is_parent_style=False, bookmark=bmark, link=self.current_link)

    def bookmark_for_anchor(self, anchor, html_tag):
        return self.links_manager.bookmark_for_anchor(anchor, self.current_item, html_tag)

    def write(self):
        self.docx.document, self.docx.styles, body = create_skeleton(self.opts)
        self.blocks.serialize(body)
        body.append(body[0])  # Move <sectPr> to the end
        self.styles_manager.serialize(self.docx.styles)
        self.images_manager.serialize(self.docx.images)
        self.fonts_manager.serialize(self.styles_manager.text_styles, self.docx.font_table, self.docx.embedded_fonts, self.docx.fonts)
        self.lists_manager.serialize(self.docx.numbering)

Example #7

Show file

File: from_html.py Project: aimylios/calibre

class Convert(object):

    # Word does not apply default styling to hyperlinks, so we ensure they get
    # default styling (the conversion pipeline does not apply any styling to
    # them).
    base_css = '''
    a[href] { text-decoration: underline; color: blue }
    '''

    def __init__(self, oeb, docx, mi, add_cover, add_toc):
        self.oeb, self.docx, self.add_cover, self.add_toc = oeb, docx, add_cover, add_toc
        self.log, self.opts = docx.log, docx.opts
        self.mi = mi
        self.cover_img = None
        p = self.opts.output_profile
        p.width_pts, p.height_pts = page_effective_area(self.opts)

    def __call__(self):
        from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
        self.svg_rasterizer = SVGRasterizer(base_css=self.base_css)
        self.svg_rasterizer(self.oeb, self.opts)

        self.styles_manager = StylesManager(self.docx.namespace, self.log, self.mi.language)
        self.links_manager = LinksManager(self.docx.namespace, self.docx.document_relationships, self.log)
        self.images_manager = ImagesManager(self.oeb, self.docx.document_relationships, self.opts)
        self.lists_manager = ListsManager(self.docx)
        self.fonts_manager = FontsManager(self.docx.namespace, self.oeb, self.opts)
        self.blocks = Blocks(self.docx.namespace, self.styles_manager, self.links_manager)
        self.current_link = self.current_lang = None

        for item in self.oeb.spine:
            self.log.debug('Processing', item.href)
            self.process_item(item)
        if self.add_toc:
            self.links_manager.process_toc_links(self.oeb)

        if self.add_cover and self.oeb.metadata.cover and unicode(self.oeb.metadata.cover[0]) in self.oeb.manifest.ids:
            cover_id = unicode(self.oeb.metadata.cover[0])
            item = self.oeb.manifest.ids[cover_id]
            self.cover_img = self.images_manager.read_image(item.href)

        all_blocks = self.blocks.all_blocks
        remove_blocks = []
        for i, block in enumerate(all_blocks):
            try:
                nb = all_blocks[i+1]
            except IndexError:
                break
            block.resolve_skipped(nb)
            if block.skipped:
                remove_blocks.append((i, block))
        for pos, block in reversed(remove_blocks):
            self.blocks.delete_block_at(pos)
        self.blocks.all_blocks[0].is_first_block = True
        self.blocks.apply_page_break_after()
        self.blocks.resolve_language()

        if self.cover_img is not None:
            self.cover_img = self.images_manager.create_cover_markup(self.cover_img, self.opts.preserve_cover_aspect_ratio, *page_size(self.opts))
        self.lists_manager.finalize(all_blocks)
        self.styles_manager.finalize(all_blocks)
        self.write()

    def process_item(self, item):
        self.current_item = item
        stylizer = self.svg_rasterizer.stylizer_cache.get(item)
        if stylizer is None:
            stylizer = Stylizer(item.data, item.href, self.oeb, self.opts, profile=self.opts.output_profile, base_css=self.base_css)
        self.abshref = self.images_manager.abshref = item.abshref

        self.current_lang = lang_for_tag(item.data) or self.styles_manager.document_lang
        for i, body in enumerate(XPath('//h:body')(item.data)):
            with self.blocks:
                self.blocks.top_bookmark = self.links_manager.bookmark_for_anchor(self.links_manager.top_anchor, self.current_item, body)
                self.process_tag(body, stylizer, is_first_tag=i == 0)

    def process_tag(self, html_tag, stylizer, is_first_tag=False, float_spec=None):
        tagname = barename(html_tag.tag)
        tag_style = stylizer.style(html_tag)
        ignore_tag_contents = tagname in {'script', 'style', 'title', 'meta'} or tag_style.is_hidden
        display = tag_style._get('display')
        is_block = False

        if not ignore_tag_contents:
            previous_link = self.current_link
            if tagname == 'a' and html_tag.get('href'):
                self.current_link = (self.current_item, html_tag.get('href'), html_tag.get('title'))
            previous_lang = self.current_lang
            tag_lang = lang_for_tag(html_tag)
            if tag_lang:
                self.current_lang = tag_lang

            is_float = tag_style['float'] in {'left', 'right'} and not is_first_tag
            if float_spec is None and is_float:
                float_spec = FloatSpec(self.docx.namespace, html_tag, tag_style)

            if display in {'inline', 'inline-block'} or tagname == 'br':  # <br> has display:block but we dont want to start a new paragraph
                if is_float and float_spec.is_dropcaps:
                    self.add_block_tag(tagname, html_tag, tag_style, stylizer, float_spec=float_spec)
                    float_spec = None
                else:
                    self.add_inline_tag(tagname, html_tag, tag_style, stylizer)
            elif display == 'list-item':
                self.add_block_tag(tagname, html_tag, tag_style, stylizer, is_list_item=True)
            elif display.startswith('table') or display == 'inline-table':
                if display == 'table-cell':
                    self.blocks.start_new_cell(html_tag, tag_style)
                    self.add_block_tag(tagname, html_tag, tag_style, stylizer, is_table_cell=True)
                elif display == 'table-row':
                    self.blocks.start_new_row(html_tag, tag_style)
                elif display in {'table', 'inline-table'}:
                    self.blocks.end_current_block()
                    self.blocks.start_new_table(html_tag, tag_style)
            else:
                if tagname == 'img' and is_float:
                    # Image is floating so dont start a new paragraph for it
                    self.add_inline_tag(tagname, html_tag, tag_style, stylizer)
                else:
                    if tagname == 'hr':
                        for edge in 'right bottom left'.split():
                            tag_style.set('border-%s-style' % edge, 'none')
                    self.add_block_tag(tagname, html_tag, tag_style, stylizer, float_spec=float_spec)

            for child in html_tag.iterchildren():
                if isinstance(getattr(child, 'tag', None), basestring):
                    self.process_tag(child, stylizer, float_spec=float_spec)
                else:  # Comment/PI/etc.
                    tail = getattr(child, 'tail', None)
                    if tail:
                        block = self.create_block_from_parent(html_tag, stylizer)
                        block.add_text(tail, tag_style, is_parent_style=False, link=self.current_link, lang=self.current_lang)

            is_block = html_tag in self.blocks.open_html_blocks
            self.blocks.finish_tag(html_tag)
            if is_block and tag_style['page-break-after'] == 'avoid':
                self.blocks.all_blocks[-1].keep_next = True

            self.current_link = previous_link
            self.current_lang = previous_lang

        # Now, process the tail if any

        if display == 'table-row':
            return  # We ignore the tail for these tags

        ignore_whitespace_tail = is_block or display.startswith('table')
        if not is_first_tag and html_tag.tail and (not ignore_whitespace_tail or not html_tag.tail.isspace()):
            # Ignore trailing space after a block tag, as otherwise it will
            # become a new empty paragraph
            block = self.create_block_from_parent(html_tag, stylizer)
            block.add_text(html_tag.tail, stylizer.style(html_tag.getparent()), is_parent_style=True, link=self.current_link, lang=self.current_lang)

    def create_block_from_parent(self, html_tag, stylizer):
        parent = html_tag.getparent()
        block = self.blocks.current_or_new_block(parent, stylizer.style(parent))
        # Do not inherit page-break-before from parent
        block.page_break_before = False
        return block

    def add_block_tag(self, tagname, html_tag, tag_style, stylizer, is_table_cell=False, float_spec=None, is_list_item=False):
        block = self.blocks.start_new_block(
            html_tag, tag_style, is_table_cell=is_table_cell, float_spec=float_spec, is_list_item=is_list_item)
        anchor = html_tag.get('id') or html_tag.get('name')
        if anchor:
            block.bookmarks.add(self.bookmark_for_anchor(anchor, html_tag))
        if tagname == 'img':
            self.images_manager.add_image(html_tag, block, stylizer, as_block=True)
        else:
            if html_tag.text:
                block.add_text(html_tag.text, tag_style, ignore_leading_whitespace=True, is_parent_style=True, link=self.current_link, lang=self.current_lang)

    def add_inline_tag(self, tagname, html_tag, tag_style, stylizer):
        anchor = html_tag.get('id') or html_tag.get('name') or None
        bmark = None
        if anchor:
            bmark = self.bookmark_for_anchor(anchor, html_tag)
        if tagname == 'br':
            if html_tag.tail or html_tag is not tuple(html_tag.getparent().iterchildren('*'))[-1]:
                block = self.create_block_from_parent(html_tag, stylizer)
                block.add_break(clear={'both':'all', 'left':'left', 'right':'right'}.get(tag_style['clear'], 'none'), bookmark=bmark)
        elif tagname == 'img':
            block = self.create_block_from_parent(html_tag, stylizer)
            self.images_manager.add_image(html_tag, block, stylizer, bookmark=bmark)
        else:
            if html_tag.text:
                block = self.create_block_from_parent(html_tag, stylizer)
                block.add_text(html_tag.text, tag_style, is_parent_style=False, bookmark=bmark, link=self.current_link, lang=self.current_lang)
            elif bmark:
                block = self.create_block_from_parent(html_tag, stylizer)
                block.add_text('', tag_style, is_parent_style=False, bookmark=bmark, link=self.current_link, lang=self.current_lang)

    def bookmark_for_anchor(self, anchor, html_tag):
        return self.links_manager.bookmark_for_anchor(anchor, self.current_item, html_tag)

    def write(self):
        self.docx.document, self.docx.styles, body = create_skeleton(self.opts)
        self.blocks.serialize(body)
        body.append(body[0])  # Move <sectPr> to the end
        if self.links_manager.toc:
            self.links_manager.serialize_toc(body, self.styles_manager.primary_heading_style)
        if self.cover_img is not None:
            self.images_manager.write_cover_block(body, self.cover_img)
        self.styles_manager.serialize(self.docx.styles)
        self.images_manager.serialize(self.docx.images)
        self.fonts_manager.serialize(self.styles_manager.text_styles, self.docx.font_table, self.docx.embedded_fonts, self.docx.fonts)
        self.lists_manager.serialize(self.docx.numbering)

Example #8

Show file

File: from_html.py Project: tomschlenkhoff/calibre

class Convert(object):

    # Word does not apply default styling to hyperlinks, so we ensure they get
    # default styling (the conversion pipeline does not apply any styling to
    # them).
    base_css = '''
    a[href] { text-decoration: underline; color: blue }
    '''

    def __init__(self, oeb, docx):
        self.oeb, self.docx = oeb, docx
        self.log, self.opts = docx.log, docx.opts

    def __call__(self):
        from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
        self.svg_rasterizer = SVGRasterizer(base_css=self.base_css)
        self.svg_rasterizer(self.oeb, self.opts)

        self.styles_manager = StylesManager(self.docx.namespace)
        self.links_manager = LinksManager(self.docx.namespace,
                                          self.docx.document_relationships)
        self.images_manager = ImagesManager(self.oeb,
                                            self.docx.document_relationships)
        self.lists_manager = ListsManager(self.docx)
        self.fonts_manager = FontsManager(self.docx.namespace, self.oeb,
                                          self.opts)
        self.blocks = Blocks(self.docx.namespace, self.styles_manager,
                             self.links_manager)
        self.current_link = None

        for item in self.oeb.spine:
            self.process_item(item)

        all_blocks = self.blocks.all_blocks
        remove_blocks = []
        for i, block in enumerate(all_blocks):
            try:
                nb = all_blocks[i + 1]
            except IndexError:
                break
            block.resolve_skipped(nb)
            if block.skipped:
                remove_blocks.append((i, block))
        for pos, block in reversed(remove_blocks):
            self.blocks.delete_block_at(pos)
        self.blocks.all_blocks[0].is_first_block = True

        self.lists_manager.finalize(all_blocks)
        self.styles_manager.finalize(all_blocks)
        self.write()

    def process_item(self, item):
        self.current_item = item
        stylizer = self.svg_rasterizer.stylizer_cache.get(item)
        if stylizer is None:
            stylizer = Stylizer(item.data,
                                item.href,
                                self.oeb,
                                self.opts,
                                self.opts.output_profile,
                                base_css=self.base_css)
        self.abshref = self.images_manager.abshref = item.abshref

        for i, body in enumerate(XPath('//h:body')(item.data)):
            with self.blocks:
                body.set('id',
                         body.get('id', None) or self.links_manager.top_anchor)
                self.process_tag(body, stylizer, is_first_tag=i == 0)

    def process_tag(self,
                    html_tag,
                    stylizer,
                    is_first_tag=False,
                    float_spec=None):
        tagname = barename(html_tag.tag)
        if tagname in {'script', 'style', 'title', 'meta'}:
            return
        tag_style = stylizer.style(html_tag)
        if tag_style.is_hidden:
            return

        previous_link = self.current_link
        if tagname == 'a' and html_tag.get('href'):
            self.current_link = (self.current_item, html_tag.get('href'),
                                 html_tag.get('title'))

        display = tag_style._get('display')
        is_float = tag_style['float'] in {'left', 'right'} and not is_first_tag
        if float_spec is None and is_float:
            float_spec = FloatSpec(self.docx.namespace, html_tag, tag_style)

        if display in {
                'inline', 'inline-block'
        } or tagname == 'br':  # <br> has display:block but we dont want to start a new paragraph
            if is_float and float_spec.is_dropcaps:
                self.add_block_tag(tagname,
                                   html_tag,
                                   tag_style,
                                   stylizer,
                                   float_spec=float_spec)
                float_spec = None
            else:
                self.add_inline_tag(tagname, html_tag, tag_style, stylizer)
        elif display == 'list-item':
            self.add_block_tag(tagname,
                               html_tag,
                               tag_style,
                               stylizer,
                               is_list_item=True)
        elif display.startswith('table') or display == 'inline-table':
            if display == 'table-cell':
                self.blocks.start_new_cell(html_tag, tag_style)
                self.add_block_tag(tagname,
                                   html_tag,
                                   tag_style,
                                   stylizer,
                                   is_table_cell=True)
            elif display == 'table-row':
                self.blocks.start_new_row(html_tag, tag_style)
            elif display in {'table', 'inline-table'}:
                self.blocks.end_current_block()
                self.blocks.start_new_table(html_tag, tag_style)
        else:
            if tagname == 'img' and is_float:
                # Image is floating so dont start a new paragraph for it
                self.add_inline_tag(tagname, html_tag, tag_style, stylizer)
            else:
                self.add_block_tag(tagname,
                                   html_tag,
                                   tag_style,
                                   stylizer,
                                   float_spec=float_spec)

        for child in html_tag.iterchildren('*'):
            self.process_tag(child, stylizer, float_spec=float_spec)

        is_block = html_tag in self.blocks.open_html_blocks
        self.blocks.finish_tag(html_tag)
        if is_block and tag_style['page-break-after'] == 'avoid':
            self.blocks.all_blocks[-1].keep_next = True

        self.current_link = previous_link

        if display == 'table-row':
            return  # We ignore the tail for these tags

        ignore_whitespace_tail = is_block or display.startswith('table')
        if not is_first_tag and html_tag.tail and (
                not ignore_whitespace_tail or not html_tag.tail.isspace()):
            # Ignore trailing space after a block tag, as otherwise it will
            # become a new empty paragraph
            block = self.blocks.current_or_new_block(
                html_tag.getparent(), stylizer.style(html_tag.getparent()))
            block.add_text(html_tag.tail,
                           stylizer.style(html_tag.getparent()),
                           is_parent_style=True,
                           link=self.current_link)

    def add_block_tag(self,
                      tagname,
                      html_tag,
                      tag_style,
                      stylizer,
                      is_table_cell=False,
                      float_spec=None,
                      is_list_item=False):
        block = self.blocks.start_new_block(html_tag,
                                            tag_style,
                                            is_table_cell=is_table_cell,
                                            float_spec=float_spec,
                                            is_list_item=is_list_item)
        anchor = html_tag.get('id') or html_tag.get('name')
        if anchor:
            block.bookmarks.add(self.bookmark_for_anchor(anchor, html_tag))
        if tagname == 'img':
            self.images_manager.add_image(html_tag,
                                          block,
                                          stylizer,
                                          as_block=True)
        else:
            if html_tag.text:
                block.add_text(html_tag.text,
                               tag_style,
                               ignore_leading_whitespace=True,
                               is_parent_style=True,
                               link=self.current_link)

    def add_inline_tag(self, tagname, html_tag, tag_style, stylizer):
        anchor = html_tag.get('id') or html_tag.get('name') or None
        bmark = None
        if anchor:
            bmark = self.bookmark_for_anchor(anchor, html_tag)
        if tagname == 'br':
            if html_tag.tail or html_tag is not tuple(
                    html_tag.getparent().iterchildren('*'))[-1]:
                block = self.blocks.current_or_new_block(
                    html_tag.getparent(), stylizer.style(html_tag.getparent()))
                block.add_break(clear={
                    'both': 'all',
                    'left': 'left',
                    'right': 'right'
                }.get(tag_style['clear'], 'none'),
                                bookmark=bmark)
        elif tagname == 'img':
            block = self.blocks.current_or_new_block(
                html_tag.getparent(), stylizer.style(html_tag.getparent()))
            self.images_manager.add_image(html_tag,
                                          block,
                                          stylizer,
                                          bookmark=bmark)
        else:
            if html_tag.text:
                block = self.blocks.current_or_new_block(
                    html_tag.getparent(), stylizer.style(html_tag.getparent()))
                block.add_text(html_tag.text,
                               tag_style,
                               is_parent_style=False,
                               bookmark=bmark,
                               link=self.current_link)

    def bookmark_for_anchor(self, anchor, html_tag):
        return self.links_manager.bookmark_for_anchor(anchor,
                                                      self.current_item,
                                                      html_tag)

    def write(self):
        self.docx.document, self.docx.styles, body = create_skeleton(self.opts)
        self.blocks.serialize(body)
        body.append(body[0])  # Move <sectPr> to the end
        self.styles_manager.serialize(self.docx.styles)
        self.images_manager.serialize(self.docx.images)
        self.fonts_manager.serialize(self.styles_manager.text_styles,
                                     self.docx.font_table,
                                     self.docx.embedded_fonts, self.docx.fonts)
        self.lists_manager.serialize(self.docx.numbering)