Example #1
0
    def pre_convert(self, original_book, book):
        super(Epub3Converter, self).pre_convert(original_book)

        if self.theme_plugin:
            try:
                self.theme_plugin.pre_convert(original_book, book)
            except NotImplementedError:
                pass

        # TODO move it to more proper place in the future, and create plugin for it
        self._bk_image_editor_conversion = ImageEditorConversion(
            original_book, EPUB_DOCUMENT_WIDTH, self)
Example #2
0
    def pre_convert(self, epub_book):
        """Called before entire process of conversion is called.

        :Args:
          - book: EPUB book object
        """

        # we parse the toc nav before pre_convert to get original content
        # take into account that if you call this after calling super pre_convert
        # you will have unexpected results regarding the section settings logic
        self.original_toc_nav = parse_toc_nav(epub_book)

        # now we call parent pre_convert to run section settings plugin
        super(MPDFConverter, self).pre_convert(epub_book)

        # Not that much needed at the moment
        self.config['page_width'], self.config['page_height'] = get_page_size(
            self.config['settings'])

        # if crop marks is enabled
        if 'crop_marks' in self.config['settings'] and self.config['settings'][
                'crop_marks'] == 'on':
            crop_margin = CROP_MARGIN

            if 'crop_margin' in self.config['settings']:
                crop_margin = int(self.config['settings']['crop_margin'])

            self.config[
                'page_width_bleed'] = self.config['page_width'] + crop_margin
            self.config[
                'page_height_bleed'] = self.config['page_height'] + crop_margin
        else:
            self.config['page_width_bleed'] = self.config['page_width']
            self.config['page_height_bleed'] = self.config['page_height']

        if self.theme_plugin:
            try:
                self.theme_plugin.pre_convert(epub_book)
            except NotImplementedError:
                pass

        # create image editor conversion instance
        # todo move it to more proper place in the future, and create plugin for it

        # calculate pdf document width
        mm = float(self.config['page_width_bleed'])
        mm -= float(self.config['settings'].get('side_margin', 0)) + float(
            self.config['settings'].get('gutter', 0))
        inches = mm / 10 / 2.54

        self._bk_image_editor_conversion = ImageEditorConversion(
            epub_book, inches * 300, self)
Example #3
0
    def pre_convert(self, book):
        """Called before entire process of conversion is called.

        :Args:
          - book: EPUB book object
        """

        super(XHTMLConverter, self).pre_convert(book)

        # create image edtor conversion instance
        # todo move it to more proper place in the future, and create plugin for it
        self._bk_image_editor_conversion = ImageEditorConversion(
            book, XHTML_DOCUMENT_WIDTH, self)
Example #4
0
    def pre_convert(self, book):
        """Called before entire process of conversion is called.

        :Args:
          - book: EPUB book object
        """

        # we parse the toc nav before pre_convert to get original content
        self.original_toc_nav = parse_toc_nav(book)

        # now we call parent pre_convert to run section settings plugin
        super(MPDFConverter, self).pre_convert(book)

        # Not that much needed at the moment
        self.config['page_width'], self.config['page_height'] = get_page_size(
            self.config['settings'])

        try:
            if 'crop_marks' in self.config['settings'] and self.config[
                    'settings']['crop_marks'] == 'on':
                crop_margin = CROP_MARGIN
            else:
                crop_margin = 0

            self.config['page_width_bleed'] = int(
                round(self.config['page_width'] + crop_margin))
            self.config['page_height_bleed'] = int(
                round(self.config['page_height'] + crop_margin))
        except:
            self.config['page_width_bleed'] = self.config['page_width']
            self.config['page_height_bleed'] = self.config['page_height']

        if self.theme_plugin:
            try:
                self.theme_plugin.pre_convert(book)
            except NotImplementedError:
                pass

        # create image edtor conversion instance
        # todo move it to more proper place in the future, and create plugin for it

        # calculate pdf document width
        mm = float(self.config['page_width_bleed'])
        mm -= float(self.config['settings'].get('side_margin', 0)) + float(
            self.config['settings'].get('gutter', 0))
        inches = mm / 10 / 2.54

        if self.name == 'mpdf':
            self._bk_image_editor_conversion = ImageEditorConversion(
                book, inches * 300, self)
Example #5
0
    def pre_convert(self, original_book, book):

        super(EpubConverter, self).pre_convert(original_book)

        if self.theme_plugin:
            try:
                self.theme_plugin.pre_convert(original_book, book)
            except NotImplementedError:
                pass

        # create image edtor conversion instance
        # todo move it to more proper place in the future, and create plugin for it
        if self.name == 'epub':
            self._bk_image_editor_conversion = ImageEditorConversion(
                original_book, EPUB_DOCUMENT_WIDTH, self.config.get("project_id")
            )
Example #6
0
    def pre_convert(self, original_book, book):
        super(MobiConverter, self).pre_convert(original_book, book)

        # create image edtor conversion instance
        # todo move it to more proper place in the future, and create plugin for it
        self._bk_image_editor_conversion = ImageEditorConversion(
            original_book, MOBI_DOCUMENT_WIDTH, self
        )
Example #7
0
    def pre_convert(self, original_book, book):
        if self.theme_plugin:
            try:
                self.theme_plugin.pre_convert(original_book, book)
            except NotImplementedError:
                pass

        # create image edtor conversion instance
        # todo move it to more proper place in the future, and create plugin for it
        if self.name == 'epub':
            self._bk_image_editor_conversion = ImageEditorConversion(
                original_book, EPUB_DOCUMENT_WIDTH, self.config.get("project_id")
            )
Example #8
0
    def pre_convert(self, original_book, book):
        super(Epub3Converter, self).pre_convert(original_book)

        if self.theme_plugin:
            try:
                self.theme_plugin.pre_convert(original_book, book)
            except NotImplementedError:
                pass

        # TODO move it to more proper place in the future, and create plugin for it
        self._bk_image_editor_conversion = ImageEditorConversion(
            original_book, EPUB_DOCUMENT_WIDTH, self
        )
Example #9
0
    def pre_convert(self, book):
        """Called before entire process of conversion is called.

        :Args:
          - book: EPUB book object
        """

        # create image edtor conversion instance
        # todo move it to more proper place in the future, and create plugin for it
        if self.name == 'xhtml':
            self._bk_image_editor_conversion = ImageEditorConversion(
                book, XHTML_DOCUMENT_WIDTH, self.config.get("project_id")
            )
Example #10
0
    def pre_convert(self, book):
        """Called before entire process of conversion is called.

        :Args:
          - book: EPUB book object
        """

        super(XHTMLConverter, self).pre_convert(book)

        # create image edtor conversion instance
        # todo move it to more proper place in the future, and create plugin for it
        self._bk_image_editor_conversion = ImageEditorConversion(
            book, XHTML_DOCUMENT_WIDTH, self
        )
Example #11
0
    def pre_convert(self, epub_book):
        super(ScreenPDFConverter, self).pre_convert(epub_book)

        # create image edtor conversion instance
        # todo move it to more proper place in the future, and create plugin for it

        # calculate pdf document width
        mm = float(self.config['page_width_bleed'])
        mm -= float(self.config['settings'].get('side_margin', 0)) + float(
            self.config['settings'].get('gutter', 0))
        inches = mm / 10 / 2.54

        self._bk_image_editor_conversion = ImageEditorConversion(
            epub_book, inches * 300, self)
Example #12
0
    def pre_convert(self, epub_book):
        """Called before entire process of conversion is called.

        :Args:
          - book: EPUB book object
        """

        # we parse the toc nav before pre_convert to get original content
        # take into account that if you call this after calling super pre_convert
        # you will have unexpected results regarding the section settings logic
        self.original_toc_nav = parse_toc_nav(epub_book)

        # now we call parent pre_convert to run section settings plugin
        super(MPDFConverter, self).pre_convert(epub_book)

        # Not that much needed at the moment
        self.config['page_width'], self.config['page_height'] = get_page_size(self.config['settings'])

        # if crop marks is enabled
        if 'crop_marks' in self.config['settings'] and self.config['settings']['crop_marks'] == 'on':
            crop_margin = CROP_MARGIN

            if 'crop_margin' in self.config['settings']:
                crop_margin = int(self.config['settings']['crop_margin'])

            self.config['page_width_bleed'] = self.config['page_width'] + crop_margin
            self.config['page_height_bleed'] = self.config['page_height'] + crop_margin
        else:
            self.config['page_width_bleed'] = self.config['page_width']
            self.config['page_height_bleed'] = self.config['page_height']

        if self.theme_plugin:
            try:
                self.theme_plugin.pre_convert(epub_book)
            except NotImplementedError:
                pass

        # create image editor conversion instance
        # todo move it to more proper place in the future, and create plugin for it

        # calculate pdf document width
        mm = float(self.config['page_width_bleed'])
        mm -= float(self.config['settings'].get('side_margin', 0)) + float(
            self.config['settings'].get('gutter', 0))
        inches = mm / 10 / 2.54

        self._bk_image_editor_conversion = ImageEditorConversion(
            epub_book, inches * 300, self
        )
Example #13
0
    def pre_convert(self, book):
        """Called before entire process of conversion is called.

        :Args:
          - book: EPUB book object
        """

        # we parse the toc nav before pre_convert to get original content
        self.original_toc_nav = parse_toc_nav(book)

        # now we call parent pre_convert to run section settings plugin
        super(MPDFConverter, self).pre_convert(book)

        # Not that much needed at the moment
        self.config['page_width'], self.config['page_height'] = get_page_size(self.config['settings'])

        try:
            if 'crop_marks' in self.config['settings'] and self.config['settings']['crop_marks'] == 'on':
                crop_margin = CROP_MARGIN
            else:
                crop_margin = 0

            self.config['page_width_bleed'] = int(round(self.config['page_width'] + crop_margin))
            self.config['page_height_bleed'] = int(round(self.config['page_height'] + crop_margin))
        except:
            self.config['page_width_bleed'] = self.config['page_width']
            self.config['page_height_bleed'] = self.config['page_height']

        if self.theme_plugin:
            try:
                self.theme_plugin.pre_convert(book)
            except NotImplementedError:
                pass

        # create image edtor conversion instance
        # todo move it to more proper place in the future, and create plugin for it

        # calculate pdf document width
        mm = float(self.config['page_width_bleed'])
        mm -= float(self.config['settings'].get('side_margin', 0)) + float(
            self.config['settings'].get('gutter', 0))
        inches = mm / 10 / 2.54

        if self.name == 'mpdf':
            self._bk_image_editor_conversion = ImageEditorConversion(
                book, inches * 300, self
            )
Example #14
0
class XHTMLConverter(BaseConverter):
    name = 'xhtml'

    _images_dir = "images/"

    def __init__(self, *args, **kwargs):
        super(XHTMLConverter, self).__init__(*args, **kwargs)
        self.images_path = os.path.join(self.sandbox_path, self._images_dir)
        self._bk_image_editor_conversion = None
        self._all_images_src = set()

    def pre_convert(self, book):
        """Called before entire process of conversion is called.

        :Args:
          - book: EPUB book object
        """

        super(XHTMLConverter, self).pre_convert(book)

        # create image edtor conversion instance
        # todo move it to more proper place in the future, and create plugin for it
        self._bk_image_editor_conversion = ImageEditorConversion(
            book, XHTML_DOCUMENT_WIDTH, self)

    def convert(self, original_book, output_path):
        logger.debug('[XHTML] XHTMLConverter.convert')

        self.output_file = zipfile.ZipFile(output_path, 'w')

        self.pre_convert(original_book)
        self._copy_items(original_book)
        self._write_images()
        self._add_styles()

        self.output_file.close()

        return {"size": os.path.getsize(output_path)}

    def _copy_items(self, original_book):
        """
        Populates the book by copying items from the original book
        """

        for item in original_book.get_items():
            item_type = item.get_type()
            file_name = os.path.basename(item.file_name)

            if item_type == ebooklib.ITEM_DOCUMENT:

                if isinstance(item, ebooklib.epub.EpubNav):
                    # Modify nav.xhtml file from EPUB for out XHTML output
                    content = self._fix_chapter(
                        self._clear_nav(item.get_content()))
                    self.output_file.writestr('index.xhtml', content)
                elif not isinstance(item, ebooklib.epub.EpubNcx):
                    # Ignore NCX file, everything else should be copied
                    content = self._fix_chapter(item.get_content())

                    self.output_file.writestr(
                        '{}/{}'.format(TEXT_DIR, file_name), content)

    def _write_images(self):
        for src in self._all_images_src:
            file_name = os.path.basename(src)

            try:
                with open(src, 'r') as img:
                    self.output_file.writestr(
                        '{}/{}'.format(IMAGES_DIR, file_name), img.read())
            except IOError:
                logger.exception("xhtml. Failed to open image for writing.")

    def _clear_nav(self, content):
        """
        Modify navigation page by fixing links and removing unwanted tags.
        """

        root = ebooklib.utils.parse_html_string(content)
        etree.strip_tags(root, 'nav')

        for _a in root.xpath('//a'):
            _a.set('href', '{}/{}'.format(TEXT_DIR, _a.get('href', '')))

        return etree.tostring(root,
                              pretty_print=True,
                              encoding="utf-8",
                              xml_declaration=True)

    def _fix_chapter(self, content):
        """
        Fix content of the chapter by adding styling, fix image links and reformat endnotes.
        """

        root = ebooklib.utils.parse_html_string(content)

        # todo move it to more proper place in the future, and create plugin for it
        if self._bk_image_editor_conversion:
            try:
                root = self._bk_image_editor_conversion.convert(root)
            except:
                logger.exception("xhtml. ImageEditorConversion failed.")

        # save all images src
        for img_element in root.iter('img'):
            if img_element.get('src'):
                self._all_images_src.add(img_element.get('src'))

        self._fix_images(root)
        self._reformat_endnotes(root)

        head = root.find('head')

        if head is not None:
            _lnk = etree.SubElement(
                head, "link", {
                    "href": "../{}/custom.css".format(STYLE_DIR),
                    "rel": "stylesheet",
                    "type": "text/css"
                })

        return etree.tostring(root,
                              pretty_print=True,
                              encoding="utf-8",
                              xml_declaration=True)

    def _fix_images(self, root):
        """
        Fix the path of the images to match with IMAGES_DIR
        """

        for element in root.iter('img'):

            path = urllib.unquote(element.get('src'))

            # if hostname, then it is an image with absolute url
            if urlparse.urlparse(path).hostname:
                continue

            try:
                path = path.decode('utf-8')
            except:
                pass

            file_name = os.path.basename(path)
            element.set('src', "../{}/{}".format(IMAGES_DIR, file_name))

    def _reformat_endnotes(self, root):
        """Insert internal link to endnote's body into the sup tag.

        :Args:
          - root: lxml node tree with the chapter content
        """
        reformat_endnotes(root)

    def _add_styles(self):
        """
        Add Styling.
        """

        content = self.config.get('settings', {}).get('styling', u'')

        self.output_file.writestr('{}/custom.css'.format(STYLE_DIR), content)
Example #15
0
    def pre_convert(self, original_book, book):
        super(DOCXConverter, self).pre_convert(original_book, book)

        self._bk_image_editor_conversion = ImageEditorConversion(
            original_book, EPUB_DOCUMENT_WIDTH, self)
Example #16
0
class XHTMLConverter(BaseConverter):
    name = 'xhtml'

    def __init__(self, *args, **kwargs):
        super(XHTMLConverter, self).__init__(*args, **kwargs)
        self._bk_image_editor_conversion = None
        self._all_images_src = set()

    def pre_convert(self, book):
        """Called before entire process of conversion is called.

        :Args:
          - book: EPUB book object
        """

        # create image edtor conversion instance
        # todo move it to more proper place in the future, and create plugin for it
        if self.name == 'xhtml':
            self._bk_image_editor_conversion = ImageEditorConversion(
                book, XHTML_DOCUMENT_WIDTH, self.config.get("project_id")
            )

    def convert(self, original_book, output_path):
        logger.debug('[XHTML] XHTMLConverter.convert')

        self.output_file = zipfile.ZipFile(output_path, 'w')

        self.pre_convert(original_book)
        self._copy_items(original_book)
        self._write_images()
        self._add_styles()

        self.output_file.close()

        return {"size": os.path.getsize(output_path)}

    def _copy_items(self, original_book):
        """
        Populates the book by copying items from the original book
        """

        for item in original_book.get_items():            
            item_type = item.get_type()
            file_name = os.path.basename(item.file_name)

            if item_type == ebooklib.ITEM_DOCUMENT:

                if isinstance(item, ebooklib.epub.EpubNav):
                    # Modify nav.xhtml file from EPUB for out XHTML output
                    content = self._fix_chapter(self._clear_nav(item.get_content()))
                    self.output_file.writestr('index.xhtml', content)
                elif not isinstance(item, ebooklib.epub.EpubNcx):
                    # Ignore NCX file, everything else should be copied
                    content = self._fix_chapter(item.get_content())

                    self.output_file.writestr('{}/{}'.format(TEXT_DIR, file_name), content)

    def _write_images(self):
        for src in self._all_images_src:
            file_name = os.path.basename(src)

            try:
                with open(src, 'r') as img:
                    self.output_file.writestr('{}/{}'.format(IMAGES_DIR, file_name), img.read())
            except IOError:
                logger.exception("xhtml. Failed to open image for writing.")

    def _clear_nav(self, content):
        """
        Modify navigation page by fixing links and removing unwanted tags.
        """

        root = ebooklib.utils.parse_html_string(content)
        etree.strip_tags(root, 'nav')

        for _a in root.xpath('//a'):
            _a.set('href', '{}/{}'.format(TEXT_DIR, _a.get('href', '')))

        return etree.tostring(root, pretty_print=True, encoding="utf-8", xml_declaration=True)

    def _fix_chapter(self, content):
        """
        Fix content of the chapter by adding styling, fix image links and reformat endnotes.
        """

        root = ebooklib.utils.parse_html_string(content)

        # todo move it to more proper place in the future, and create plugin for it
        if self._bk_image_editor_conversion:
            try:
                root = self._bk_image_editor_conversion.convert(root)
            except:
                logger.exception("xhtml. ImageEditorConversion failed.")

        # save all images src
        for img_element in root.iter('img'):
            if img_element.get('src'):
                self._all_images_src.add(img_element.get('src'))

        self._fix_images(root)
        self._reformat_endnotes(root)

        head = root.find('head')

        if head is not None:
            _lnk = etree.SubElement(head, "link", {"href": "../{}/custom.css".format(STYLE_DIR), "rel": "stylesheet", "type": "text/css"})

        return etree.tostring(root, pretty_print=True, encoding="utf-8", xml_declaration=True)

    def _fix_images(self, root):
        """
        Fix the path of the images to match with IMAGES_DIR
        """

        for element in root.iter('img'):

            path = urllib.unquote(element.get('src'))

            # if hostname, then it is an image with absolute url
            if urlparse.urlparse(path).hostname:
                continue

            try:
                path = path.decode('utf-8')
            except:
                pass

            file_name = os.path.basename(path)
            element.set('src', "../{}/{}".format(IMAGES_DIR, file_name))

    def _reformat_endnotes(self, root):
        """Insert internal link to endnote's body into the sup tag.

        :Args:
          - root: lxml node tree with the chapter content
        """
        reformat_endnotes(root)

    def _add_styles(self):
        """
        Add Styling.
        """

        content = self.config.get('settings', {}).get('styling', u'')

        self.output_file.writestr('{}/custom.css'.format(STYLE_DIR), content)
Example #17
0
class Epub3Converter(BaseConverter):
    name = 'epub3'
    verbose_name = _('EPUB3')
    support_section_settings = True
    images_color_model = "RGB"

    toc_title = 'toc'
    default_style = 'style1'
    default_lang = DEFAULT_LANG
    writer_plugin_class = WriterPlugin
    css_dir = os.path.join(os.path.dirname(__file__), 'styles/')

    _theme_suffix = 'epub'
    _images_dir = 'images/'

    # valid extensions to assign right mimetype
    WOFF_FONTS = ['.woff']
    OPENTYPE_FONTS = ['.otf', '.otc', '.ttf', '.ttc']

    def __init__(self, *args, **kwargs):
        super(Epub3Converter, self).__init__(*args, **kwargs)

        self.images_path = os.path.join(self.sandbox_path, self._images_dir)

        self.theme_name = ''
        self.theme_plugin = None
        self._bk_image_editor_conversion = None

    def _get_theme_plugin(self):
        return plugin.load_theme_plugin(self._theme_suffix, self.theme_name)

    def _init_theme_plugin(self):
        if 'theme' in self.config:
            self.theme_name = self.config['theme'].get('id', '')
            tp = self._get_theme_plugin()
            if tp:
                self.theme_plugin = tp(self)
        else:
            self.theme_name = None

    def pre_convert(self, original_book, book):
        super(Epub3Converter, self).pre_convert(original_book)

        if self.theme_plugin:
            try:
                self.theme_plugin.pre_convert(original_book, book)
            except NotImplementedError:
                pass

        # TODO move it to more proper place in the future, and create plugin for it
        self._bk_image_editor_conversion = ImageEditorConversion(
            original_book, EPUB_DOCUMENT_WIDTH, self
        )

    def post_convert(self, original_book, book, output_path):

        if self.theme_plugin:
            try:
                self.theme_plugin.post_convert(original_book, book, output_path)
            except NotImplementedError:
                pass

    def convert(self, original_book, output_path):
        convert_start = datetime.datetime.now()

        logger.debug('[EPUB] {}.convert'.format(self.__class__.__name__))

        self._init_theme_plugin()

        epub_book = ebooklib.epub.EpubBook()
        epub_book.FOLDER_NAME = 'OEBPS'

        self.pre_convert(original_book, epub_book)

        epub_book.uid = original_book.uid
        epub_book.title = original_book.title

        # we should define better uri for this
        epub_book.add_prefix('bkterms', 'http://booktype.org/')

        epub_book.metadata = deepcopy(original_book.metadata)
        epub_book.toc = []

        self.direction = self._get_dir(epub_book)

        logger.debug('[EPUB] Edit metadata')
        self._edit_metadata(epub_book)

        logger.debug('[EPUB] Copy items')
        self._copy_items(epub_book, original_book)

        logger.debug('[EPUB] Make navigation')
        self._make_nav(epub_book, original_book)

        logger.debug('[EPUB] Add cover')
        self._add_cover(epub_book)

        if self.theme_name:
            self._add_theme_assets(epub_book)

        self.post_convert(original_book, epub_book, output_path)

        logger.debug('[EPUB] Setting writer plugins and options')
        writer_options = {'plugins': self._get_plugins(epub_book, original_book)}

        logger.debug('[EPUB] Writer')
        writer_class = self._get_writer_class()
        epub_writer = writer_class(output_path, epub_book, options=writer_options)

        logger.debug('[EPUB] Process')
        epub_writer.process()

        logger.debug('[EPUB] Write')
        epub_writer.write()

        logger.debug('[END] {}.convert'.format(self.__class__.__name__))

        convert_end = datetime.datetime.now()
        logger.info('Conversion lasted %s.', convert_end - convert_start)

        return {"size": os.path.getsize(output_path)}

    def _get_dir(self, epub_book):
        m = epub_book.metadata[ebooklib.epub.NAMESPACES["OPF"]]

        def _check(x):
            return x[1] and x[1].get('property', '') == 'bkterms:dir'

        values = filter(_check, m[None])
        if len(values) > 0 and len(values[0]) > 0:
            return values[0][0].lower()

        return 'ltr'

    def _get_writer_plugin_class(self):
        """Returns the writer plugin class to used by writer"""

        if self.writer_plugin_class:
            return self.writer_plugin_class
        raise ImproperlyConfigured

    def _get_writer_plugin(self, epub_book, original_book):
        """Returns the writer plugin instance with some default options already set up"""

        writer_plugin = self._get_writer_plugin_class()()
        opts = {
            'css': self._add_css_styles(epub_book),
            'style': self.config.get('style', self.default_style),
            'lang': self._get_language(original_book),
            'preview': self.config.get('preview', True)
        }

        writer_plugin.options.update(opts)
        return writer_plugin

    def _get_plugins(self, epub_book, original_book):
        """Returns the plugins to be used by writer instance"""

        writer_plugin = self._get_writer_plugin(epub_book, original_book)
        image_editor_writer_plugin = ImageEditorWriterPlugin(converter=self)
        cleanup_tags_writerplugin = CleanupTagsWriterPlugin()

        return [writer_plugin, image_editor_writer_plugin, cleanup_tags_writerplugin]

    def _get_writer_class(self):
        """Simply returns the default writer class to be used by the converter"""

        return Epub3Writer

    def _get_language(self, original_book):
        """
        Returns the book language, if there is no language in metadata (from settings)
        then we use the default language set to the class
        """

        metadata = self._get_data(original_book)
        default = metadata.get('language', self.default_lang)
        return self.config.get('lang', default)

    def _edit_metadata(self, epub_book):
        """Modifies original metadata."""

        # delete existing 'modified' tag
        m = epub_book.metadata[ebooklib.epub.NAMESPACES["OPF"]]
        m[None] = filter(lambda (_, x): not (isinstance(x, dict) and x.get("property") == "dcterms:modified"), m[None])  # noqa

        # we also need to remove the `additional metadata` which here is just garbage
        m[None] = filter(lambda (_, x): not (isinstance(x, dict) and x.get("property").startswith("add_meta_terms:")), m[None])  # noqa

        # NOTE: probably going to extend this function in future

    def _make_nav(self, epub_book, original_book):
        """Creates navigational stuff (guide, ncx, nav) by copying the original."""

        # maps TOC items to sections and links
        self._num_of_text = 0

        def mapper(toc_item):
            add_to_guide = True

            if isinstance(toc_item[1], list):
                section_title, chapters = toc_item

                section = ebooklib.epub.Section(section_title)
                links = map(mapper, chapters)

                return (section, links)
            else:
                chapter_title, chapter_href = toc_item

                chapter_href = "{}/{}".format(DOCUMENTS_DIR, chapter_href)
                chapter_path = urlparse.urlparse(chapter_href).path

                book_item = self.items_by_path[chapter_path]
                book_item.title = chapter_title

                if self._num_of_text > 0:
                    add_to_guide = False

                self._num_of_text += 1

                if add_to_guide:
                    epub_book.guide.append({
                        'type': 'text',
                        'href': chapter_href,
                        'title': chapter_title,
                    })

                return ebooklib.epub.Link(
                    href=chapter_href, title=chapter_title, uid=book_item.id)

        # filters-out empty sections
        def _empty_sec(item):
            if isinstance(item, tuple) and len(item[1]) == 0:
                return False
            else:
                return True

        # filters-out existing cover
        def _skip_cover(item):
            if type(item[1]) in (str, unicode):
                if os.path.basename(item[1]) == COVER_FILE_NAME:
                    return False
            return True

        toc = filter(_skip_cover, parse_toc_nav(original_book))
        toc = map(mapper, toc)

        # we don't allow empty sections just because epubcheck will
        # raise an error at the moment of evaluating the toc.ncx file
        toc = filter(_empty_sec, toc)

        epub_book.toc = toc

    def _copy_items(self, epub_book, original_book):
        """Populates the book by copying items from the original book"""

        self.items_by_path = {}

        for orig_item in original_book.items:
            item = deepcopy(orig_item)
            item_type = item.get_type()
            file_name = os.path.basename(item.file_name)

            # do not copy cover
            if self._is_cover_item(item):
                continue

            if item_type == ebooklib.ITEM_IMAGE:
                item.file_name = '{}/{}'.format(IMAGES_DIR, file_name)

            elif item_type == ebooklib.ITEM_STYLE:
                item.file_name = '{}/{}'.format(STYLES_DIR, file_name)

            elif item_type == ebooklib.ITEM_DOCUMENT:
                item.file_name = '{}/{}'.format(DOCUMENTS_DIR, file_name)
                if isinstance(item, ebooklib.epub.EpubNav):
                    epub_book.spine.insert(0, item)
                    epub_book.guide.insert(0, {
                        'type': 'toc',
                        'href': file_name,
                        'title': self.config.get('toc_title', self.toc_title)
                    })
                    item.file_name = file_name
                else:
                    epub_book.spine.append(item)

                    if self.theme_plugin:
                        try:
                            content = ebooklib.utils.parse_html_string(item.content)
                            cnt = self.theme_plugin.fix_content(content)
                            item.content = etree.tostring(cnt, method='html', encoding='utf-8', pretty_print=True)
                        except NotImplementedError:
                            pass

                    # todo move it to more proper place in the future, and create plugin for it
                    if self._bk_image_editor_conversion:
                        try:
                            content = ebooklib.utils.parse_html_string(item.content)
                            cnt = self._bk_image_editor_conversion.convert(content)
                            item.content = etree.tostring(cnt, method='html', encoding='utf-8', pretty_print=True)
                        except:
                            logger.exception("epub ImageEditorConversion failed")

            if isinstance(item, ebooklib.epub.EpubNcx):
                item = ebooklib.epub.EpubNcx()

            epub_book.add_item(item)
            self.items_by_path[item.file_name] = item

    def _add_cover(self, epub_book):
        """Adds cover image if present in config to the resulting EPUB"""

        if 'cover_image' in self.config.keys():
            cover_asset = self.get_asset(self.config['cover_image'])
            add_cover(
                epub_book, cover_asset, self.config.get('lang', DEFAULT_LANG))

    def _get_theme_style(self):
        return read_theme_style(self.theme_name, self._theme_suffix)

    def _get_default_style(self):
        return render_to_string('themes/style_{}.css'.format(self._theme_suffix), {'dir': self.direction})

    def _add_css_styles(self, epub_book):
        """Adds default css styles and custom css text if exists in config"""

        book_css = []

        try:
            epub_book.add_item(
                ebooklib.epub.EpubItem(
                    uid='default.css',
                    content=self._get_default_style(),
                    file_name='{}/{}'.format(STYLES_DIR, 'default.css'),
                    media_type='text/css'
                )
            )
            book_css.append('default.css')
        except Exception as e:
            logger.info('Default style was not added %s.', e)

        if self.theme_name:
            content = self._get_theme_style()

            if self.theme_name == 'custom':
                try:
                    data = json.loads(self.config['theme']['custom'].encode('utf8'))

                    tmpl = Template(content)
                    ctx = Context(data)
                    content = tmpl.render(ctx)
                except:
                    logger.exception("Fails with custom theme.")

            item = ebooklib.epub.EpubItem(
                uid='theme.css',
                content=content,
                file_name='{}/{}'.format(STYLES_DIR, 'theme.css'),
                media_type='text/css'
            )

            epub_book.add_item(item)
            book_css.append('theme.css')

        # we need to add css from publishing settings screen
        settings_style = self.config.get('settings', {}).get('styling', None)

        if settings_style:
            item = ebooklib.epub.EpubItem(
                uid='custom_style.css',
                content=settings_style,
                file_name='{}/{}'.format(STYLES_DIR, 'custom_style.css'),
                media_type='text/css'
            )

            epub_book.add_item(item)
            book_css.append('custom_style.css')

        return book_css


    def _get_theme_assets(self):
        return read_theme_assets(self.theme_name, self._theme_suffix)

    def _add_theme_assets(self, epub_book):
        assets = self._get_theme_assets()

        for asset_type, asset_list in assets.iteritems():
            if asset_type == 'images':
                for image_name in asset_list:
                    name = os.path.basename(image_name)
                    content = read_theme_asset_content(self.theme_name, image_name)

                    if content:
                        image = ebooklib.epub.EpubImage()
                        image.file_name = "{}/{}".format(IMAGES_DIR, name)
                        image.id = 'theme_image_%s' % uuid.uuid4().hex[:5]
                        image.set_content(content)

                        epub_book.add_item(image)
            elif asset_type == 'fonts':
                for font_name in asset_list:
                    name = os.path.basename(font_name)
                    extension = os.path.splitext(font_name)[-1].lower()
                    content = read_theme_asset_content(self.theme_name, font_name)

                    if content:
                        font = ebooklib.epub.EpubItem()
                        font.file_name = "{}/{}".format(FONTS_DIR, name)
                        font.set_content(content)

                        # try to set the right font media type
                        # http://www.idpf.org/epub/301/spec/epub-publications.html#sec-core-media-types
                        if extension in self.OPENTYPE_FONTS:
                            font.media_type = 'application/vnd.ms-opentype'
                        elif extension in self.WOFF_FONTS:
                            font.media_type = 'application/font-woff'

                        epub_book.add_item(font)

    def _get_data(self, book):
        """Returns default data for the front and end matter templates.

        It mainly has default metadata from the book.

        :Returns:
          - Dictionary with default data for the templates
        """

        return {
            "title": get_refines(book.metadata, 'title-type', 'main'),
            "subtitle": get_refines(book.metadata, 'title-type', 'subtitle'),
            "shorttitle": get_refines(book.metadata, 'title-type', 'short'),
            "author": get_refines(book.metadata, 'role', 'aut'),

            "publisher": get_metadata(book.metadata, 'publisher'),
            "isbn": get_metadata(book.metadata, 'identifier'),
            "language": get_metadata(book.metadata, 'language'),

            "metadata": book.metadata
        }

    def _is_cover_item(self, item):
        """Determines if an given item is cover type"""

        file_name = os.path.basename(item.file_name)

        cover_types = [
            ebooklib.epub.EpubCover,
            ebooklib.epub.EpubCoverHtml
        ]

        return (type(item) in cover_types or file_name == 'cover.xhtml')
Example #18
0
class MPDFConverter(BaseConverter):
    """

    This code creates all required files and then passed them to booktype2mpdf.php script to produce
    final PDF output.

    These are the files we pass to the PHP script:
    - body.html
    - frontmatter.html
    - endmatter.html
    - style.css
    - config.json

    config.json file keeps all required input information like page settings, styling, metadata and etc.
    This file is really just the way how we pass the information to PHP script.

    style.css is produced from Django template we defined. It holds all default, theme and custom styling for
    this specific book.

    frontmatter.html, body.html and endmatter.html are produced from Django template files we defined.

    Customisation can be done in two different ways:

    **Templates**

    List of template files:
    - themes/frontmatter_mpdf.html
    - themes/endmatter_mpdf.html
    - themes/body_mpdf.html
    - themes/style_mpdf.css

    **Extending code**

    You need to create your own convert module and extend this class. After that
    you have a set of methods which you could extend to take full control over your
    PDF production:

    - pre_convert
    - post_convert
    - get_extra_data
    - get_extra_style
    - get_extra_configuration
    - get_extra_body_data
    - get_metadata
    """

    name = 'mpdf'
    verbose_name = _("Printers' PDF")
    support_section_settings = True
    images_color_model = "CMYK"

    _images_dir = "images/"
    _body_pdf_name = "body.pdf"
    _body_html_name = "body.html"

    def __init__(self, *args, **kwargs):
        super(MPDFConverter, self).__init__(*args, **kwargs)

        # absolute path to directory where images are saved
        self.images_path = os.path.join(self.sandbox_path, self._images_dir)
        # image item name -> file name mappings
        self.images = {}
        self.theme_name = ''
        self.theme_plugin = None
        self._bk_image_editor_conversion = None
        self._full_page_images_css = 'div.fpi-page-end { page: normalpage; }\n'

    def pre_convert(self, epub_book):
        """Called before entire process of conversion is called.

        :Args:
          - book: EPUB book object
        """

        # we parse the toc nav before pre_convert to get original content
        # take into account that if you call this after calling super pre_convert
        # you will have unexpected results regarding the section settings logic
        self.original_toc_nav = parse_toc_nav(epub_book)

        # now we call parent pre_convert to run section settings plugin
        super(MPDFConverter, self).pre_convert(epub_book)

        # Not that much needed at the moment
        self.config['page_width'], self.config['page_height'] = get_page_size(self.config['settings'])

        # if crop marks is enabled
        if 'crop_marks' in self.config['settings'] and self.config['settings']['crop_marks'] == 'on':
            crop_margin = CROP_MARGIN

            if 'crop_margin' in self.config['settings']:
                crop_margin = int(self.config['settings']['crop_margin'])

            self.config['page_width_bleed'] = self.config['page_width'] + crop_margin
            self.config['page_height_bleed'] = self.config['page_height'] + crop_margin
        else:
            self.config['page_width_bleed'] = self.config['page_width']
            self.config['page_height_bleed'] = self.config['page_height']

        if self.theme_plugin:
            try:
                self.theme_plugin.pre_convert(epub_book)
            except NotImplementedError:
                pass

        # create image editor conversion instance
        # todo move it to more proper place in the future, and create plugin for it

        # calculate pdf document width
        mm = float(self.config['page_width_bleed'])
        mm -= float(self.config['settings'].get('side_margin', 0)) + float(
            self.config['settings'].get('gutter', 0))
        inches = mm / 10 / 2.54

        self._bk_image_editor_conversion = ImageEditorConversion(
            epub_book, inches * 300, self
        )

    def post_convert(self, book, output_path):
        """Called after entire process of conversion is done.

        :Args:
          - book: EPUB Book object
          - output_path: file path to output file
        """

        if self.theme_plugin:
            try:
                self.theme_plugin.post_convert(book, output_path)
            except NotImplementedError:
                pass

    def _get_dir(self, epub_book):
        m = epub_book.metadata[ebooklib.epub.NAMESPACES["OPF"]]

        def _check(x):
            return x[1] and x[1].get('property', '') == 'bkterms:dir'

        values = filter(_check, m[None])
        if len(values) > 0 and len(values[0]) > 0:
            return values[0][0].lower()

        return 'ltr'

    def get_extra_data(self, book):
        """Returns extra data which will be passed to the front matter and end matter templates.

        :Args:
          - book: EPUB Book object

        :Returns:
          Returns dictionary.
        """
        return {}

    def get_extra_body_data(self, book):
        """Returns extra data which will be passed to the body templates.

        :Args:
          - book: EPUB Book object

        :Returns:
          Returns dictionary.
        """

        return {}

    def get_extra_style(self, book):
        """Returns extra data which will be passed to the template rendering styling files.

        :Args:
          - book: EPUB Book object

        :Returns:
          Returns dictionary.
        """

        return {}

    def get_extra_configuration(self):
        """Returns extra data which will be passed to the configuration file.

        Configuration file is read by booktype2mpdf.php script which calls mPDF library.
        The idea is that we can extend this method and pass some additional information
        to the PHP script.

        :Returns:
          Returns dictionary.
        """

        data = {'mirror_margins': True}

        if self.theme_plugin:
            data['mpdf'] = self.theme_plugin.get_mpdf_config()

        # get additional mpdf configuration options
        data.setdefault('mpdf', {}).update(self._get_theme_mpdf_config())

        return data

    def get_metadata(self, book):
        """Returns metadata which will be passed to the PHP script.

        The idea is that we return certain metadata information which will be written
        to the configuration file. The idea is that booktype2mpdf.php script could
        also get some of the metadata information.

        :Args:
          - book: EPUB Book object

        :Returns:
          Returns dictionary with metadata information.
        """

        dc_metadata = {
            key: value[0][0] for key, value in
            book.metadata.get("http://purl.org/dc/elements/1.1/").iteritems()
        }

        m = book.metadata[ebooklib.epub.NAMESPACES["OPF"]]

        def _check(x):
            if x[1].get('property', '').startswith('add_meta_terms:'):
                return True
            return False

        for key, value in filter(_check, m[None]):
            dc_metadata[value.get('property')] = key

        dc_metadata['bkterms:dir'] = self.direction

        return dc_metadata

    def _init_theme_plugin(self):
        """
        Checks for custom theme's plugin. If no custom plugin if found,
        it will load the mpdf default one
        """

        default_theme_plugin = plugin.MPDFPlugin

        if 'theme' in self.config:
            self.theme_name = self.config['theme'].get('id', '')
            tp = plugin.load_theme_plugin(self.name, self.theme_name)

            self.theme_plugin = tp(self) if tp else default_theme_plugin(self)

    def convert(self, book, output_path):
        """Starts conversion process.

        :Args:
          - book: EPUB Book object
          - output_path: directory path where output files will be saved

        :Returns:
          Returns dictionary with number of pages and file size of output file
        """

        convert_start = datetime.datetime.now()

        self._init_theme_plugin()

        self.direction = self._get_dir(book)

        self.pre_convert(book)

        self._save_images(book)

        self._create_body(book)
        self._write_configuration(book)
        self._create_frontmatter(book)
        self._create_endmatter(book)

        if self.theme_name != '':
            self._add_theme_assets(book)

        self._write_style(book)

        html_path = os.path.join(self.sandbox_path, self._body_html_name)
        pdf_path = os.path.join(self.sandbox_path, self._body_pdf_name)
        data_out = self._run_renderer(html_path, pdf_path)

        os.rename(pdf_path, output_path)

        self.post_convert(book, output_path)

        convert_end = datetime.datetime.now()

        logger.info('Conversion lasted %s.', convert_end - convert_start)

        return {
            "pages": data_out.get('pages', 0),
            "size": os.path.getsize(output_path)
        }

    def _get_chapter_content(self, chapter_item):
        """Returns content of the chapter after some postprocessing.

        This function will also fix certain things in the content. Clear up the links
        pointing to images, remove links for PDF output and etc.

        :Returns:
          Returns strings with HTML content of the chapter.
        """

        base_path = os.path.dirname(chapter_item.file_name)

        try:
            chapter = ebooklib.utils.parse_html_string(chapter_item.content)
            chapter_child = chapter.find("body")

            if chapter_child is not None:
                cnt = deepcopy(chapter_child)
                self._fix_images(cnt, base_path)
                cnt = self._fix_content(cnt)

                if self.theme_plugin:
                    try:
                        cnt = self.theme_plugin.fix_content(cnt)
                    except NotImplementedError:
                        pass

                # todo move it to more proper place in the future, and create plugin for it
                if self._bk_image_editor_conversion:
                    try:
                        cnt = self._bk_image_editor_conversion.convert(cnt)
                    except:
                        logger.exception("mpdf ImageEditorConversion failed")

                self._fix_full_page_image(cnt)

                return etree.tostring(cnt, method='html', encoding='utf-8', pretty_print=True)[6:-9]
        except etree.XMLSyntaxError:
            pass

        return u''

    def _fix_horrible_mpdf(self, content):
        content = content.replace('></columnbreak>', " />\n")
        content = content.replace('></columns>', " />\n")

        return content

    def _create_body(self, book):
        """Create body html file with main content of the book.

        Created html file will be used by booktype2mpdf.php script
        to create final PDF file.

        :Args:
          - book: EPUB Book object
        """

        settings_dict = get_sections_settings(book)

        def _toc(depth, toc_items, parent=None, toc_setting=None):
            items = []
            sec_count = 1

            for toc_item in toc_items:
                # SECTIONS
                if isinstance(toc_item[1], list):
                    section_title, chapters = toc_item
                    url_title = booktype_slugify(section_title)

                    # let's build a section key and try to get settings for current section
                    section_key = SectionsSettingsPlugin.build_section_key(url_title, sec_count)
                    section_settings = json.loads(settings_dict.get(section_key, '{}'))
                    toc_setting = section_settings.get('toc', {}).get(self.name, '')

                    # jump to next item (continue) if the whole section should be hidden
                    show_in_outputs = section_settings.get('show_in_outputs', {})
                    show_section_in_current_converter = show_in_outputs.get(self.name, True)
                    if not show_section_in_current_converter:
                        continue

                    toc_item = TocItem({
                        'type': 'section',
                        'level': depth,
                        'title': section_title,
                        'url_title': url_title,
                        'show_in_toc': 'hide_section' not in toc_setting
                    })
                    items.append(toc_item)
                    items += _toc(depth + 1, chapters, section_title, toc_setting)
                    sec_count += 1

                # CHAPTERS
                else:
                    chapter_title, chapter_href = toc_item
                    chapter_item = book.get_item_with_href(chapter_href)
                    content = self._get_chapter_content(chapter_item)
                    content = self._fix_horrible_mpdf(content)

                    href_filename, file_extension = os.path.splitext(chapter_href)

                    if not parent:
                        toc_setting = ''

                    toc_item = TocItem({
                        'type': 'chapter',
                        'level': depth,
                        'title': chapter_title,
                        'url_title': booktype_slugify(chapter_title),
                        'href': chapter_href,
                        'href_filename': href_filename,
                        'content': content,
                        'show_in_toc': 'hide_chapters' not in toc_setting
                    })
                    items.append(toc_item)

            return items

        book_toc = _toc(0, self.original_toc_nav)

        data = self._get_data(book)
        data.update(self.get_extra_data(book))
        data.update({
            'book_items': book_toc
        })

        body_name = get_body(self.theme_name, self.name)
        html = render_to_string(body_name, data)
        html_path = os.path.join(self.sandbox_path, self._body_html_name)
        f = codecs.open(html_path, 'wt', 'utf8')
        f.write(html)
        f.close()

    def _write_style(self, book):
        """Creates style file.

        Style file will include default styling, theme styling and custom styling
        provided by the user.

        Created style file will be used by booktype2mpdf.php script
        to create final PDF file.

        :Args:
          - book: EPUB Book object
        """

        if 'settings' not in self.config:
            return

        css_style = create_default_style(self.config, self.name, self.get_extra_style(book))
        theme_style = u''

        if self.theme_name != '':
            theme_style = read_theme_style(self.theme_name, self.name)

            try:
                if self.theme_name == 'custom':
                    custom = self.config['theme'].pop('custom', '{}')
                    custom = json.loads(custom.encode('utf-8'))
                    self.config.update(custom)

                tmpl = Template(theme_style)
                ctx = Context(self.config)
                _style = tmpl.render(ctx)
                theme_style = _style
            except:
                logger.exception("Writing styles failed for `%s` theme." % self.theme_name)

        custom_style = self.config.get('settings', {}).get('styling', u'')

        # add css for fpi
        css_style += self._full_page_images_css

        f = codecs.open('{}/style.css'.format(self.sandbox_path), 'wt', 'utf8')
        f.write(css_style)
        f.write(theme_style)
        f.write(custom_style)
        f.close()

    def _write_configuration(self, book):
        """Creates configuration file for booktype2mpdf.php script.

        Configuration file is read by the booktype2mpdf.php script. It is
        how we pass information to the PHP script which will finally
        create and format the PDF file.

        :Args:
          - book: EPUB Book object
        """

        dc_metadata = self.get_metadata(book)

        data = {'metadata': dc_metadata, 'config': self.config}
        data.update(self.get_extra_configuration())

        f = codecs.open('{}/config.json'.format(self.sandbox_path), 'wt', 'utf8')
        f.write(unicode(json.dumps(data), 'utf8'))
        f.close()

    def _save_images(self, book):
        """Saves all the images from EPUB file to the temporary directory.

        :Args:
          - book: EPUB Book object
        """

        if not os.path.exists(self.images_path):
            os.makedirs(self.images_path)

        for item in book.get_items_of_type(ebooklib.ITEM_IMAGE):
            self._save_image(item)

    def _save_image(self, item):
        """Saves single image to the temporary library.

        :Args:
          - item: ebooklib item object
        """

        file_name = os.path.basename(item.file_name)
        file_path = os.path.join(self.images_path, file_name)

        if os.path.exists(file_path):
            file_name = '{}-{}'.format(item.id, file_name)
            file_path = os.path.join(self.images_path, file_name)

        with open(file_path, 'wb') as file:
            file.write(item.content)

        self.images[item.file_name] = file_name

    def _fix_columns(self, content):
        """Add mPDF tags for multi column support.

        :Args:
          - content: lxml node tree with the chapter content

        """
        for column in content.xpath("//div[contains(@class, 'bk-columns')]"):
            column_count = column.get('data-column', '3')
            column_valign = column.get('data-valign', '')
            column_gap = column.get('data-gap', '5')

            columns_start = etree.Element('columns', {
                'column-count': column_count,
                'vAlign': column_valign,
                'column-gap': column_gap
            })

            parent = column.getparent()
            parent.insert(parent.index(column), columns_start)

            if 'bk-marker' not in column.get('class'):
                columns_end = etree.Element('columns', {'column-count': '1'})
                parent.insert(parent.index(column) + 1, columns_end)

            column.drop_tag()

        for column_break in content.xpath("//div[@class='bk-column-break']"):
            column_break.tag = 'columnbreak'
            del column_break.attrib['class']

    def _fix_broken_links(self, content):
        """Removes links from the output and replaces them with textual url.

        :Args:
          - content: lxml node tree with the chapter content

        """

        for link in content.iter('a'):
            if link.attrib.get('href', '') != '':
                text = link.tail or ''
                link.tail = ' [' + link.attrib.get('href', '') + ']' + text
                link.tag = 'span'

    def _fix_content(self, content):
        """Removes unwanted formatting from the content.

        This function will remove links from the content and put
        URL link outside of the link.

        :Args:
          - content: lxml node tree with the chapter content

        :Returns:
          - Returns changed lxml node tree
        """

        if content is None:
            return content

        self._fix_broken_links(content)
        self._fix_columns(content)

        # Fix links to other URL places
        return content

    def _fix_images(self, root, base_path):
        """Fixes links to the images in the content.

        :Args:
          - root: lxml node tree with the content
          - base_path: directory where our images are placed
        """

        for element in root.iter('img'):
            _src = element.get('src', None)

            if _src is None:
                continue
            src_url = urllib2.unquote(_src)
            item_name = os.path.normpath(os.path.join(base_path, src_url))
            try:
                file_name = self.images[item_name]
                element.set('src', self._images_dir + file_name)
            except Exception as e:
                # make sure to delete style attribute to avoid mpdf gets broken
                # TODO: this should be handled from the image editor. Fix it later
                item_style = element.get('style', '')
                if 'transform' in item_style:
                    del element.attrib['style']

                # TODO: discuss what to do in case of missing image

                logger.error(
                    'MPDF::_fix_images: image not found %s (%s)' %
                    (item_name, e)
                )
                continue

    def _fix_full_page_image(self, content):

        # find fpi (full page image) frame
        for img in content.xpath('.//img[contains(@class, "fpi")]'):
            div_image = img.getparent()
            div_group_img = div_image.getparent()

            # remove cpation
            caption = div_group_img.xpath('.//div[@class="caption_small"]')
            if caption:
                div_group_img.remove(caption[0])

            # set id
            fpi_frame_id = "fpi-{}".format(str(uuid.uuid4()))
            div_image.attrib['id'] = fpi_frame_id

            # add fpi end
            div_group_img.insert(div_group_img.index(div_image) + 1,
                                 etree.XML('<div class="fpi-page-end"></div>'))

            # image src
            img = div_image.xpath('.//img')[0]
            fpi_src = img.attrib['src']

            # remove <img/>
            div_image.remove(img)

            # add css for current fpi
            self._full_page_images_css += render_to_string(
                'convert/full_page_image_mpdf.css', {
                    'fpi_id': fpi_frame_id,
                    'fpi_src': fpi_src
                }
            )

            # remove group_img block
            div_group_img.drop_tag()

        return content

    def _get_data(self, book):
        """Returns default data for the front and end matter templates.

        It mainly has default metadata from the book.

        :Returns:
          - Dictionary with default data for the templates
        """

        show_header, show_footer = True, True
        if 'settings' in self.config:
            show_header = self.config['settings'].get('show_header', '') == 'on'
            show_footer = self.config['settings'].get('show_footer', '') == 'on'

        return {
            "title": get_refines(book.metadata, 'title-type', 'main'),
            "subtitle": get_refines(book.metadata, 'title-type', 'subtitle'),
            "shorttitle": get_refines(book.metadata, 'title-type', 'short'),
            "author": get_refines(book.metadata, 'role', 'aut'),

            "publisher": get_metadata(book.metadata, 'publisher'),
            "isbn": get_metadata(book.metadata, 'identifier'),
            "language": get_metadata(book.metadata, 'language'),
            "dir": self.direction,

            "metadata": book.metadata,

            "show_header": show_header,
            "show_footer": show_footer
        }

    def _create_frontmatter(self, book):
        """Creates front matter file.

        Front matter HTML file will be used by booktype2mpdf.php script to create
        PDF file.

        :Args:
          - book: EPUB Book object
        """

        data = self._get_data(book)
        data.update(self.get_extra_data(book))

        frontmatter_name = get_single_frontmatter(self.theme_name, self.name)
        html = render_to_string(frontmatter_name, data)

        f = codecs.open('{}/frontmatter.html'.format(self.sandbox_path), 'wt', 'utf8')
        f.write(html)
        f.close()

    def _create_endmatter(self, book):
        """Creates end matter file.

        End matter HTML file will be used by booktype2mpdf.php script to create
        PDF file.

        :Args:
          - book: EPUB Book object
        """

        data = self._get_data(book)
        data.update(self.get_extra_data(book))

        endmatter_name = get_single_endmatter(self.theme_name, self.name)
        html = render_to_string(endmatter_name, data)

        f = codecs.open('{}/endmatter.html'.format(self.sandbox_path), 'wt', 'utf8')
        f.write(html)
        f.close()

    def _add_theme_assets(self, book):
        """Copy all the assets from the theme to the sandbox directory.

        :Args:
          - book: EPUB book object
        """

        assets = read_theme_assets(self.theme_name, self.name)

        def _write(name, content):
            try:
                os.makedirs('{}/assets/'.format(self.sandbox_path))
            except:
                pass

            if os.path.normpath('{}/assets/{}'.format(self.sandbox_path, name)).startswith(self.sandbox_path):
                try:
                    f = open('{}/assets/{}'.format(self.sandbox_path, name), 'wb')
                    f.write(content)
                    f.close()
                except IOError:
                    pass

        for asset_type, asset_list in assets.iteritems():
            if asset_type == 'images':
                for image_name in asset_list:
                    name = os.path.basename(image_name)
                    content = read_theme_asset_content(self.theme_name, image_name)

                    _write(name, content)
            elif asset_type == 'fonts':
                for font_name in asset_list:
                    name = os.path.basename(font_name)
                    content = read_theme_asset_content(self.theme_name, font_name)

                    _write(name, content)

    def _run_renderer(self, html_path, pdf_path):
        """Calls booktype2mpdf.php script to create PDF file.

        :Args:
          - html_path: path to the html file
          - pdf_path: path to he output PDF file

        :Returns:
          Returns dictionary with output returned by the PHP script
        """

        MPDF_DIR = settings.MPDF_DIR
        PHP_PATH = settings.PHP_PATH
        MPDF_SCRIPT = settings.MPDF_SCRIPT

        params = ['--mpdf={}'.format(MPDF_DIR),
                  '--dir={}'.format(self.sandbox_path),
                  '--output={}'.format(pdf_path)]

        cmd = [PHP_PATH, MPDF_SCRIPT] + params

        try:
            (_, out, err) = utils.run_command(cmd)

            logger.info('MPDF Converter::Stdout: output: "{}", error: "{}"'.format(out, err))

            data = json.loads(out)

            return data
        except Exception as e:
            logger.error(
                'MPDF Converter::Fail running the command "{}".'.format(e))

        return {}

    def _get_theme_mpdf_config(self):
        """
        Checks the theme info.json file and returns the additional options for mpdf
        if there is any defined inside of it.
        """

        profile = self.name
        data = read_theme_info('{}/themes/{}/info.json'.format(settings.BOOKTYPE_ROOT, self.theme_name))

        if 'output' in data:
            if profile in data['output']:
                return data['output'][profile].get('options', {})

        return {}
Example #19
0
class MPDFConverter(BaseConverter):
    """

    This code creates all required files and then passed them to booktype2mpdf.php script to produce
    final PDF output.

    These are the files we pass to the PHP script:
    - body.html
    - frontmatter.html
    - endmatter.html
    - style.css
    - config.json

    config.json file keeps all required input information like page settings, styling, metadata and etc.
    This file is really just the way how we pass the information to PHP script.

    style.css is produced from Django template we defined. It holds all default, theme and custom styling for
    this specific book.

    frontmatter.html, body.html and endmatter.html are produced from Django template files we defined.

    Customisation can be done in two different ways:

    **Templates**

    List of template files:
    - themes/frontmatter_mpdf.html
    - themes/endmatter_mpdf.html
    - themes/body_mpdf.html
    - themes/style_mpdf.css

    **Extending code**

    You need to create your own convert module and extend this class. After that
    you have a set of methods which you could extend to take full control over your
    PDF production:

    - pre_convert
    - post_convert
    - get_extra_data
    - get_extra_style
    - get_extra_configuration
    - get_extra_body_data
    - get_metadata
    """

    name = "mpdf"

    _images_dir = "images/"
    _body_pdf_name = "body.pdf"
    _body_html_name = "body.html"

    def __init__(self, *args, **kwargs):
        super(MPDFConverter, self).__init__(*args, **kwargs)

        # absolute path to directory where images are saved
        self.images_path = os.path.join(self.sandbox_path, self._images_dir)
        # image item name -> file name mappings
        self.images = {}
        self.theme_name = ''
        self.theme_plugin = None
        self._bk_image_editor_conversion = None

    def pre_convert(self, book):
        """Called before entire process of conversion is called.

        :Args:
          - book: EPUB book object
        """

        super(MPDFConverter, self).pre_convert(book)

        # Not that much needed at the moment
        self.config['page_width'], self.config['page_height'] = get_page_size(
            self.config['settings'])

        try:
            if 'crop_marks' in self.config['settings'] and self.config[
                    'settings']['crop_marks'] == 'on':
                crop_margin = CROP_MARGIN
            else:
                crop_margin = 0

            self.config['page_width_bleed'] = int(
                round(self.config['page_width'] + crop_margin))
            self.config['page_height_bleed'] = int(
                round(self.config['page_height'] + crop_margin))
        except:
            self.config['page_width_bleed'] = self.config['page_width']
            self.config['page_height_bleed'] = self.config['page_height']

        if self.theme_plugin:
            try:
                self.theme_plugin.pre_convert(book)
            except NotImplementedError:
                pass

        # create image edtor conversion instance
        # todo move it to more proper place in the future, and create plugin for it

        # calculate pdf document width
        mm = float(self.config['page_width_bleed'])
        mm -= float(self.config['settings'].get('side_margin', 0)) + float(
            self.config['settings'].get('gutter', 0))
        inches = mm / 10 / 2.54

        if self.name == 'mpdf':
            self._bk_image_editor_conversion = ImageEditorConversion(
                book, inches * 300, self)

    def post_convert(self, book, output_path):
        """Called after entire process of conversion is done.

        :Args:
          - book: EPUB Book object
          - output_path: file path to output file
        """

        if self.theme_plugin:
            try:
                self.theme_plugin.post_convert(book, output_path)
            except NotImplementedError:
                pass

    def _get_dir(self, epub_book):
        m = epub_book.metadata[ebooklib.epub.NAMESPACES["OPF"]]

        def _check(x):
            return x[1] and x[1].get('property', '') == 'bkterms:dir'

        values = filter(_check, m[None])
        if len(values) > 0 and len(values[0]) > 0:
            return values[0][0].lower()

        return 'ltr'

    def get_extra_data(self, book):
        """Returns extra data which will be passed to the front matter and end matter templates.

        :Args:
          - book: EPUB Book object

        :Returns:
          Returns dictionary.
        """
        return {}

    def get_extra_body_data(self, book):
        """Returns extra data which will be passed to the body templates.

        :Args:
          - book: EPUB Book object

        :Returns:
          Returns dictionary.
        """

        return {}

    def get_extra_style(self, book):
        """Returns extra data which will be passed to the template rendering styling files.

        :Args:
          - book: EPUB Book object

        :Returns:
          Returns dictionary.
        """

        return {}

    def get_extra_configuration(self):
        """Returns extra data which will be passed to the configuration file.

        Configuration file is read by booktype2mpdf.php script which calls mPDF library.
        The idea is that we can extend this method and pass some additional information
        to the PHP script.

        :Returns:
          Returns dictionary.
        """

        data = {'mirror_margins': True}

        if self.theme_plugin:
            data['mpdf'] = self.theme_plugin.get_mpdf_config()

        # get additional mpdf configuration options
        data.setdefault('mpdf', {}).update(self._get_theme_mpdf_config())

        return data

    def get_metadata(self, book):
        """Returns metadata which will be passed to the PHP script.

        The idea is that we return certain metadata information which will be written
        to the configuration file. The idea is that booktype2mpdf.php script could
        also get some of the metadata information.

        :Args:
          - book: EPUB Book object

        :Returns:
          Returns dictionary with metadata information.
        """

        dc_metadata = {
            key: value[0][0]
            for key, value in book.metadata.get(
                "http://purl.org/dc/elements/1.1/").iteritems()
        }

        m = book.metadata[ebooklib.epub.NAMESPACES["OPF"]]

        def _check(x):
            if x[1].get('property', '').startswith('add_meta_terms:'):
                return True
            return False

        for key, value in filter(_check, m[None]):
            dc_metadata[value.get('property')] = key

        dc_metadata['bkterms:dir'] = self.direction

        return dc_metadata

    def _init_theme_plugin(self):
        """
        Checks for custom theme's plugin. If no custom plugin if found,
        it will load the mpdf default one
        """

        default_theme_plugin = plugin.MPDFPlugin

        if 'theme' in self.config:
            self.theme_name = self.config['theme'].get('id', '')
            tp = plugin.load_theme_plugin(self.name, self.theme_name)

            self.theme_plugin = tp(self) if tp else default_theme_plugin(self)

    def convert(self, book, output_path):
        """Starts conversion process.

        :Args:
          - book: EPUB Book object
          - output_path: directory path where output files will be saved

        :Returns:
          Returns dictionary with number of pages and file size of output file
        """

        convert_start = datetime.datetime.now()

        self._init_theme_plugin()

        self.direction = self._get_dir(book)

        self.pre_convert(book)

        self._save_images(book)

        self._create_body(book)
        self._write_configuration(book)
        self._create_frontmatter(book)
        self._create_endmatter(book)

        if self.theme_name != '':
            self._add_theme_assets(book)

        self._write_style(book)

        html_path = os.path.join(self.sandbox_path, self._body_html_name)
        pdf_path = os.path.join(self.sandbox_path, self._body_pdf_name)
        data_out = self._run_renderer(html_path, pdf_path)

        os.rename(pdf_path, output_path)

        self.post_convert(book, output_path)

        convert_end = datetime.datetime.now()

        logger.info('Conversion lasted %s.', convert_end - convert_start)

        return {
            "pages": data_out.get('pages', 0),
            "size": os.path.getsize(output_path)
        }

    def _get_chapter_content(self, chapter_item):
        """Returns content of the chapter after some postprocessing.

        This function will also fix certain things in the content. Clear up the links
        pointing to images, remove links for PDF output and etc.

        :Returns:
          Returns strings with HTML content of the chapter.
        """

        base_path = os.path.dirname(chapter_item.file_name)

        try:
            chapter = ebooklib.utils.parse_html_string(chapter_item.content)
            chapter_child = chapter.find("body")

            if chapter_child is not None:
                cnt = deepcopy(chapter_child)
                self._fix_images(cnt, base_path)
                cnt = self._fix_content(cnt)

                if self.theme_plugin:
                    try:
                        cnt = self.theme_plugin.fix_content(cnt)
                    except NotImplementedError:
                        pass

                # todo move it to more proper place in the future, and create plugin for it
                if self._bk_image_editor_conversion:
                    try:
                        cnt = self._bk_image_editor_conversion.convert(cnt)
                    except:
                        logger.exception("mpdf ImageEditorConversion failed")

                return etree.tostring(cnt,
                                      method='html',
                                      encoding='utf-8',
                                      pretty_print=True)[6:-9]
        except etree.XMLSyntaxError:
            pass

        return u''

    def _fix_horrible_mpdf(self, content):
        content = content.replace('></columnbreak>', " />\n")
        content = content.replace('></columns>', " />\n")

        return content

    def _create_body(self, book):
        """Create body html file with main content of the book.

        Created html file will be used by booktype2mpdf.php script
        to create final PDF file.

        :Args:
          - book: EPUB Book object
        """
        def _toc(depth, toc_items):
            items = []

            for toc_item in toc_items:
                if isinstance(toc_item[1], list):
                    section_title, chapters = toc_item

                    items += [{
                        'type': 'section',
                        'level': depth,
                        'title': section_title,
                        'url_title': booktype_slugify(section_title),
                    }]
                    items += _toc(depth + 1, chapters)
                else:
                    chapter_title, chapter_href = toc_item
                    chapter_item = book.get_item_with_href(chapter_href)
                    content = self._get_chapter_content(chapter_item)
                    content = self._fix_horrible_mpdf(content)

                    href_filename, file_extension = os.path.splitext(
                        chapter_href)
                    items.append({
                        'type': 'chapter',
                        'level': depth,
                        'title': chapter_title,
                        'url_title': booktype_slugify(chapter_title),
                        'href': chapter_href,
                        'href_filename': href_filename,
                        'content': content
                    })

            return items

        book_toc = _toc(0, parse_toc_nav(book))

        data = self._get_data(book)
        data.update(self.get_extra_data(book))
        data.update({'book_items': book_toc})

        #        if self.theme_name != '':
        body_name = get_body(self.theme_name, self.name)
        html = render_to_string(body_name, data)
        # else:
        #     body_name = 'body_{}.html'.format(self.name)
        #     html = render_to_string('themes/{}'.format(body_name), data)

        html_path = os.path.join(self.sandbox_path, self._body_html_name)
        f = codecs.open(html_path, 'wt', 'utf8')
        f.write(html)
        f.close()

    def _write_style(self, book):
        """Creates style file.

        Style file will include default styling, theme styling and custom styling
        provided by the user.

        Created style file will be used by booktype2mpdf.php script
        to create final PDF file.

        :Args:
          - book: EPUB Book object
        """

        if 'settings' not in self.config:
            return

        css_style = create_default_style(self.config, self.name,
                                         self.get_extra_style(book))
        theme_style = u''

        if self.theme_name != '':
            theme_style = read_theme_style(self.theme_name, self.name)

            try:
                if self.theme_name == 'custom':
                    custom = self.config['theme'].pop('custom', '{}')
                    custom = json.loads(custom.encode('utf-8'))
                    self.config.update(custom)

                tmpl = Template(theme_style)
                ctx = Context(self.config)
                _style = tmpl.render(ctx)
                theme_style = _style
            except:
                logger.exception("Writing styles failed for `%s` theme." %
                                 self.theme_name)

        custom_style = self.config.get('settings', {}).get('styling', u'')

        f = codecs.open('{}/style.css'.format(self.sandbox_path), 'wt', 'utf8')
        f.write(css_style)
        f.write(theme_style)
        f.write(custom_style)
        f.close()

    def _write_configuration(self, book):
        """Creates configuration file for booktype2mpdf.php script.

        Configuration file is read by the booktype2mpdf.php script. It is
        how we pass information to the PHP script which will finally
        create and format the PDF file.

        :Args:
          - book: EPUB Book object
        """

        dc_metadata = self.get_metadata(book)

        data = {'metadata': dc_metadata, 'config': self.config}
        data.update(self.get_extra_configuration())

        f = codecs.open('{}/config.json'.format(self.sandbox_path), 'wt',
                        'utf8')
        f.write(unicode(json.dumps(data), 'utf8'))
        f.close()

    def _save_images(self, book):
        """Saves all the images from EPUB file to the temporary directory.

        :Args:
          - book: EPUB Book object
        """

        if not os.path.exists(self.images_path):
            os.makedirs(self.images_path)

        for item in book.get_items_of_type(ebooklib.ITEM_IMAGE):
            self._save_image(item)

    def _save_image(self, item):
        """Saves single image to the temporary library.

        :Args:
          - item: ebooklib item object
        """

        file_name = os.path.basename(item.file_name)
        file_path = os.path.join(self.images_path, file_name)

        if os.path.exists(file_path):
            file_name = '{}-{}'.format(item.id, file_name)
            file_path = os.path.join(self.images_path, file_name)

        with open(file_path, 'wb') as file:
            file.write(item.content)

        self.images[item.file_name] = file_name

    def _fix_columns(self, content):
        """Add mPDF tags for multi column support.

        :Args:
          - content: lxml node tree with the chapter content

        """
        for column in content.xpath("//div[contains(@class, 'bk-columns')]"):
            column_count = column.get('data-column', '3')
            column_valign = column.get('data-valign', '')
            column_gap = column.get('data-gap', '5')

            columns_start = etree.Element(
                'columns', {
                    'column-count': column_count,
                    'vAlign': column_valign,
                    'column-gap': column_gap
                })

            parent = column.getparent()
            parent.insert(parent.index(column), columns_start)

            if 'bk-marker' not in column.get('class'):
                columns_end = etree.Element('columns', {'column-count': '1'})
                parent.insert(parent.index(column) + 1, columns_end)

            column.drop_tag()

        for column_break in content.xpath("//div[@class='bk-column-break']"):
            column_break.tag = 'columnbreak'
            del column_break.attrib['class']

    def _fix_broken_links(self, content):
        """Removes links from the output and replaces them with textual url.

        :Args:
          - content: lxml node tree with the chapter content

        """

        for link in content.iter('a'):
            if link.attrib.get('href', '') != '':
                text = link.tail or ''
                link.tail = ' [' + link.attrib.get('href', '') + ']' + text
                link.tag = 'span'

    def _fix_content(self, content):
        """Removes unwanted formatting from the content.

        This function will remove links from the content and put
        URL link outside of the link.

        :Args:
          - content: lxml node tree with the chapter content

        :Returns:
          - Returns changed lxml node tree
        """

        if content is None:
            return content

        self._fix_broken_links(content)
        self._fix_columns(content)

        # Fix links to other URL places
        return content

    def _fix_images(self, root, base_path):
        """Fixes links to the images in the content.

        :Args:
          - root: lxml node tree with the content
          - base_path: directory where our images are placed
        """

        for element in root.iter('img'):
            _src = element.get('src', None)

            if _src is None:
                continue
            src_url = urllib2.unquote(_src)
            item_name = os.path.normpath(os.path.join(base_path, src_url))
            try:
                file_name = self.images[item_name]
                element.set('src', self._images_dir + file_name)
            except Exception as e:
                # make sure to delete style attribute to avoid mpdf gets broken
                # TODO: this should be handled from the image editor. Fix it later
                item_style = element.get('style', '')
                if 'transform' in item_style:
                    del element.attrib['style']

                # TODO: discuss what to do in case of missing image

                logger.error('MPDF::_fix_images: image not found %s (%s)' %
                             (item_name, e))
                continue

    def _get_data(self, book):
        """Returns default data for the front and end matter templates.

        It mainly has default metadata from the book.

        :Returns:
          - Dictionary with default data for the templates
        """

        show_header, show_footer = True, True
        if 'settings' in self.config:
            show_header = self.config['settings'].get('show_header',
                                                      '') == 'on'
            show_footer = self.config['settings'].get('show_footer',
                                                      '') == 'on'

        return {
            "title": get_refines(book.metadata, 'title-type', 'main'),
            "subtitle": get_refines(book.metadata, 'title-type', 'subtitle'),
            "shorttitle": get_refines(book.metadata, 'title-type', 'short'),
            "author": get_refines(book.metadata, 'role', 'aut'),
            "publisher": get_metadata(book.metadata, 'publisher'),
            "isbn": get_metadata(book.metadata, 'identifier'),
            "language": get_metadata(book.metadata, 'language'),
            "dir": self.direction,
            "metadata": book.metadata,
            "show_header": show_header,
            "show_footer": show_footer
        }

    def _create_frontmatter(self, book):
        """Creates front matter file.

        Front matter HTML file will be used by booktype2mpdf.php script to create
        PDF file.

        :Args:
          - book: EPUB Book object
        """

        data = self._get_data(book)
        data.update(self.get_extra_data(book))

        #        if self.theme_name != '':
        frontmatter_name = get_single_frontmatter(self.theme_name, self.name)
        html = render_to_string(frontmatter_name, data)
        # else:
        #     frontmatter_name = 'frontmatter_{}.html'.format(self.name)
        #     html = render_to_string('themes/{}'.format(frontmatter_name), data)

        f = codecs.open('{}/frontmatter.html'.format(self.sandbox_path), 'wt',
                        'utf8')
        f.write(html)
        f.close()

    def _create_endmatter(self, book):
        """Creates end matter file.

        End matter HTML file will be used by booktype2mpdf.php script to create
        PDF file.

        :Args:
          - book: EPUB Book object
        """

        data = self._get_data(book)
        data.update(self.get_extra_data(book))

        #        if self.theme_name != '':
        endmatter_name = get_single_endmatter(self.theme_name, self.name)
        html = render_to_string(endmatter_name, data)
        # else:
        #     endmatter_name = 'endmatter_{}.html'.format(self.name)
        #     html = render_to_string('themes/{}'.format(endmatter_name), data)

        f = codecs.open('{}/endmatter.html'.format(self.sandbox_path), 'wt',
                        'utf8')
        f.write(html)
        f.close()

    def _add_theme_assets(self, book):
        """Copy all the assets from the theme to the sandbox directory.

        :Args:
          - book: EPUB book object
        """

        assets = read_theme_assets(self.theme_name, self.name)

        def _write(name, content):
            try:
                os.makedirs('{}/assets/'.format(self.sandbox_path))
            except:
                pass

            if os.path.normpath('{}/assets/{}'.format(
                    self.sandbox_path, name)).startswith(self.sandbox_path):
                try:
                    f = open('{}/assets/{}'.format(self.sandbox_path, name),
                             'wb')
                    f.write(content)
                    f.close()
                except IOError:
                    pass

        for asset_type, asset_list in assets.iteritems():
            if asset_type == 'images':
                for image_name in asset_list:
                    name = os.path.basename(image_name)
                    content = read_theme_asset_content(self.theme_name,
                                                       image_name)

                    _write(name, content)
            elif asset_type == 'fonts':
                for font_name in asset_list:
                    name = os.path.basename(font_name)
                    content = read_theme_asset_content(self.theme_name,
                                                       font_name)

                    _write(name, content)

    def _run_renderer(self, html_path, pdf_path):
        """Calls booktype2mpdf.php script to create PDF file.

        :Args:
          - html_path: path to the html file
          - pdf_path: path to he output PDF file

        :Returns:
          Returns dictionary with output returned by the PHP script
        """

        MPDF_DIR = settings.MPDF_DIR
        PHP_PATH = settings.PHP_PATH
        MPDF_SCRIPT = settings.MPDF_SCRIPT

        params = [
            '--mpdf={}'.format(MPDF_DIR), '--dir={}'.format(self.sandbox_path),
            '--output={}'.format(pdf_path)
        ]

        cmd = [PHP_PATH, MPDF_SCRIPT] + params

        try:
            (_, out, err) = utils.run_command(cmd)
            data = json.loads(out)

            return data
        except Exception as e:
            logger.error(
                'MPDF Converter::Fail running the command "{}".'.format(e))

        return {}

    def _get_theme_mpdf_config(self):
        """
        Checks the theme info.json file and returns the additional options for mpdf
        if there is any defined inside of it.
        """

        profile = self.name
        data = read_theme_info('{}/themes/{}/info.json'.format(
            settings.BOOKTYPE_ROOT, self.theme_name))

        if 'output' in data:
            if profile in data['output']:
                return data['output'][profile].get('options', {})

        return {}
Example #20
0
class Epub3Converter(BaseConverter):
    name = 'epub3'
    verbose_name = _('EPUB3')
    support_section_settings = True

    toc_title = 'toc'
    default_style = 'style1'
    default_lang = DEFAULT_LANG
    writer_plugin_class = WriterPlugin
    css_dir = os.path.join(os.path.dirname(__file__), 'styles/')

    _theme_suffix = 'epub'
    _images_dir = 'images/'

    # valid extensions to assign right mimetype
    WOFF_FONTS = ['.woff']
    OPENTYPE_FONTS = ['.otf', '.otc', '.ttf', '.ttc']

    def __init__(self, *args, **kwargs):
        super(Epub3Converter, self).__init__(*args, **kwargs)

        self.images_path = os.path.join(self.sandbox_path, self._images_dir)

        self.theme_name = ''
        self.theme_plugin = None
        self._bk_image_editor_conversion = None

    def _get_theme_plugin(self):
        return plugin.load_theme_plugin(self._theme_suffix, self.theme_name)

    def _init_theme_plugin(self):
        if 'theme' in self.config:
            self.theme_name = self.config['theme'].get('id', '')
            tp = self._get_theme_plugin()
            if tp:
                self.theme_plugin = tp(self)
        else:
            self.theme_name = None

    def pre_convert(self, original_book, book):
        super(Epub3Converter, self).pre_convert(original_book)

        if self.theme_plugin:
            try:
                self.theme_plugin.pre_convert(original_book, book)
            except NotImplementedError:
                pass

        # TODO move it to more proper place in the future, and create plugin for it
        self._bk_image_editor_conversion = ImageEditorConversion(
            original_book, EPUB_DOCUMENT_WIDTH, self)

    def post_convert(self, original_book, book, output_path):

        if self.theme_plugin:
            try:
                self.theme_plugin.post_convert(original_book, book,
                                               output_path)
            except NotImplementedError:
                pass

    def convert(self, original_book, output_path):
        convert_start = datetime.datetime.now()

        logger.debug('[EPUB] {}.convert'.format(self.__class__.__name__))

        self._init_theme_plugin()

        epub_book = ebooklib.epub.EpubBook()
        epub_book.FOLDER_NAME = 'OEBPS'

        self.pre_convert(original_book, epub_book)

        epub_book.uid = original_book.uid
        epub_book.title = original_book.title

        # we should define better uri for this
        epub_book.add_prefix('bkterms', 'http://booktype.org/')

        epub_book.metadata = deepcopy(original_book.metadata)
        epub_book.toc = []

        self.direction = self._get_dir(epub_book)

        logger.debug('[EPUB] Edit metadata')
        self._edit_metadata(epub_book)

        logger.debug('[EPUB] Copy items')
        self._copy_items(epub_book, original_book)

        logger.debug('[EPUB] Make navigation')
        self._make_nav(epub_book, original_book)

        logger.debug('[EPUB] Add cover')
        self._add_cover(epub_book)

        if self.theme_name:
            self._add_theme_assets(epub_book)

        self.post_convert(original_book, epub_book, output_path)

        logger.debug('[EPUB] Setting writer plugins and options')
        writer_options = {
            'plugins': self._get_plugins(epub_book, original_book)
        }

        logger.debug('[EPUB] Writer')
        writer_class = self._get_writer_class()
        epub_writer = writer_class(output_path,
                                   epub_book,
                                   options=writer_options)

        logger.debug('[EPUB] Process')
        epub_writer.process()

        logger.debug('[EPUB] Write')
        epub_writer.write()

        logger.debug('[END] {}.convert'.format(self.__class__.__name__))

        convert_end = datetime.datetime.now()
        logger.info('Conversion lasted %s.', convert_end - convert_start)

        return {"size": os.path.getsize(output_path)}

    def _get_dir(self, epub_book):
        m = epub_book.metadata[ebooklib.epub.NAMESPACES["OPF"]]

        def _check(x):
            return x[1] and x[1].get('property', '') == 'bkterms:dir'

        values = filter(_check, m[None])
        if len(values) > 0 and len(values[0]) > 0:
            return values[0][0].lower()

        return 'ltr'

    def _get_writer_plugin_class(self):
        """Returns the writer plugin class to used by writer"""

        if self.writer_plugin_class:
            return self.writer_plugin_class
        raise ImproperlyConfigured

    def _get_writer_plugin(self, epub_book, original_book):
        """Returns the writer plugin instance with some default options already set up"""

        writer_plugin = self._get_writer_plugin_class()()
        opts = {
            'css': self._add_css_styles(epub_book),
            'style': self.config.get('style', self.default_style),
            'lang': self._get_language(original_book),
            'preview': self.config.get('preview', True)
        }

        writer_plugin.options.update(opts)
        return writer_plugin

    def _get_plugins(self, epub_book, original_book):
        """Returns the plugins to be used by writer instance"""

        writer_plugin = self._get_writer_plugin(epub_book, original_book)
        image_editor_writer_plugin = ImageEditorWriterPlugin(converter=self)
        cleanup_tags_writerplugin = CleanupTagsWriterPlugin()

        return [
            writer_plugin, image_editor_writer_plugin,
            cleanup_tags_writerplugin
        ]

    def _get_writer_class(self):
        """Simply returns the default writer class to be used by the converter"""

        return Epub3Writer

    def _get_language(self, original_book):
        """
        Returns the book language, if there is no language in metadata (from settings)
        then we use the default language set to the class
        """

        metadata = self._get_data(original_book)
        default = metadata.get('language', self.default_lang)
        return self.config.get('lang', default)

    def _edit_metadata(self, epub_book):
        """Modifies original metadata."""

        # delete existing 'modified' tag
        m = epub_book.metadata[ebooklib.epub.NAMESPACES["OPF"]]
        m[None] = filter(lambda (_, x): not (isinstance(x, dict) and x.get(
            "property") == "dcterms:modified"), m[None])  # noqa

        # we also need to remove the `additional metadata` which here is just garbage
        m[None] = filter(lambda (_, x): not (isinstance(x, dict) and x.get(
            "property").startswith("add_meta_terms:")), m[None])  # noqa

        # NOTE: probably going to extend this function in future

    def _make_nav(self, epub_book, original_book):
        """Creates navigational stuff (guide, ncx, nav) by copying the original."""

        # maps TOC items to sections and links
        self._num_of_text = 0

        def mapper(toc_item):
            add_to_guide = True

            if isinstance(toc_item[1], list):
                section_title, chapters = toc_item

                section = ebooklib.epub.Section(section_title)
                links = map(mapper, chapters)

                return (section, links)
            else:
                chapter_title, chapter_href = toc_item

                chapter_href = "{}/{}".format(DOCUMENTS_DIR, chapter_href)
                chapter_path = urlparse.urlparse(chapter_href).path

                book_item = self.items_by_path[chapter_path]
                book_item.title = chapter_title

                if self._num_of_text > 0:
                    add_to_guide = False

                self._num_of_text += 1

                if add_to_guide:
                    epub_book.guide.append({
                        'type': 'text',
                        'href': chapter_href,
                        'title': chapter_title,
                    })

                return ebooklib.epub.Link(href=chapter_href,
                                          title=chapter_title,
                                          uid=book_item.id)

        # filters-out empty sections
        def _empty_sec(item):
            if isinstance(item, tuple) and len(item[1]) == 0:
                return False
            else:
                return True

        # filters-out existing cover
        def _skip_cover(item):
            if type(item[1]) in (str, unicode):
                if os.path.basename(item[1]) == COVER_FILE_NAME:
                    return False
            return True

        toc = filter(_skip_cover, parse_toc_nav(original_book))
        toc = map(mapper, toc)

        # we don't allow empty sections just because epubcheck will
        # raise an error at the moment of evaluating the toc.ncx file
        toc = filter(_empty_sec, toc)

        epub_book.toc = toc

    def _copy_items(self, epub_book, original_book):
        """Populates the book by copying items from the original book"""

        self.items_by_path = {}

        for orig_item in original_book.items:
            item = deepcopy(orig_item)
            item_type = item.get_type()
            file_name = os.path.basename(item.file_name)

            # do not copy cover
            if self._is_cover_item(item):
                continue

            if item_type == ebooklib.ITEM_IMAGE:
                item.file_name = '{}/{}'.format(IMAGES_DIR, file_name)

            elif item_type == ebooklib.ITEM_STYLE:
                item.file_name = '{}/{}'.format(STYLES_DIR, file_name)

            elif item_type == ebooklib.ITEM_DOCUMENT:
                item.file_name = '{}/{}'.format(DOCUMENTS_DIR, file_name)
                if isinstance(item, ebooklib.epub.EpubNav):
                    epub_book.spine.insert(0, item)
                    epub_book.guide.insert(
                        0, {
                            'type': 'toc',
                            'href': file_name,
                            'title': self.config.get('toc_title',
                                                     self.toc_title)
                        })
                    item.file_name = file_name
                else:
                    epub_book.spine.append(item)

                    if self.theme_plugin:
                        try:
                            content = ebooklib.utils.parse_html_string(
                                item.content)
                            cnt = self.theme_plugin.fix_content(content)
                            item.content = etree.tostring(cnt,
                                                          method='html',
                                                          encoding='utf-8',
                                                          pretty_print=True)
                        except NotImplementedError:
                            pass

                    # todo move it to more proper place in the future, and create plugin for it
                    if self._bk_image_editor_conversion:
                        try:
                            content = ebooklib.utils.parse_html_string(
                                item.content)
                            cnt = self._bk_image_editor_conversion.convert(
                                content)
                            item.content = etree.tostring(cnt,
                                                          method='html',
                                                          encoding='utf-8',
                                                          pretty_print=True)
                        except:
                            logger.exception(
                                "epub ImageEditorConversion failed")

            if isinstance(item, ebooklib.epub.EpubNcx):
                item = ebooklib.epub.EpubNcx()

            epub_book.add_item(item)
            self.items_by_path[item.file_name] = item

    def _add_cover(self, epub_book):
        """Adds cover image if present in config to the resulting EPUB"""

        if 'cover_image' in self.config.keys():
            cover_asset = self.get_asset(self.config['cover_image'])
            add_cover(epub_book, cover_asset,
                      self.config.get('lang', DEFAULT_LANG))

    def _get_theme_style(self):
        return read_theme_style(self.theme_name, self._theme_suffix)

    def _get_default_style(self):
        return render_to_string(
            'themes/style_{}.css'.format(self._theme_suffix),
            {'dir': self.direction})

    def _add_css_styles(self, epub_book):
        """Adds default css styles and custom css text if exists in config"""

        book_css = []

        try:
            epub_book.add_item(
                ebooklib.epub.EpubItem(uid='default.css',
                                       content=self._get_default_style(),
                                       file_name='{}/{}'.format(
                                           STYLES_DIR, 'default.css'),
                                       media_type='text/css'))
            book_css.append('default.css')
        except Exception as e:
            logger.info('Default style was not added %s.', e)

        if self.theme_name:
            content = self._get_theme_style()

            if self.theme_name == 'custom':
                try:
                    data = json.loads(
                        self.config['theme']['custom'].encode('utf8'))

                    tmpl = Template(content)
                    ctx = Context(data)
                    content = tmpl.render(ctx)
                except:
                    logger.exception("Fails with custom theme.")

            item = ebooklib.epub.EpubItem(uid='theme.css',
                                          content=content,
                                          file_name='{}/{}'.format(
                                              STYLES_DIR, 'theme.css'),
                                          media_type='text/css')

            epub_book.add_item(item)
            book_css.append('theme.css')

        # we need to add css from publishing settings screen
        settings_style = self.config.get('settings', {}).get('styling', None)

        if settings_style:
            item = ebooklib.epub.EpubItem(uid='custom_style.css',
                                          content=settings_style,
                                          file_name='{}/{}'.format(
                                              STYLES_DIR, 'custom_style.css'),
                                          media_type='text/css')

            epub_book.add_item(item)
            book_css.append('custom_style.css')

        return book_css

    def _get_theme_assets(self):
        return read_theme_assets(self.theme_name, self._theme_suffix)

    def _add_theme_assets(self, epub_book):
        assets = self._get_theme_assets()

        for asset_type, asset_list in assets.iteritems():
            if asset_type == 'images':
                for image_name in asset_list:
                    name = os.path.basename(image_name)
                    content = read_theme_asset_content(self.theme_name,
                                                       image_name)

                    if content:
                        image = ebooklib.epub.EpubImage()
                        image.file_name = "{}/{}".format(IMAGES_DIR, name)
                        image.id = 'theme_image_%s' % uuid.uuid4().hex[:5]
                        image.set_content(content)

                        epub_book.add_item(image)
            elif asset_type == 'fonts':
                for font_name in asset_list:
                    name = os.path.basename(font_name)
                    extension = os.path.splitext(font_name)[-1].lower()
                    content = read_theme_asset_content(self.theme_name,
                                                       font_name)

                    if content:
                        font = ebooklib.epub.EpubItem()
                        font.file_name = "{}/{}".format(FONTS_DIR, name)
                        font.set_content(content)

                        # try to set the right font media type
                        # http://www.idpf.org/epub/301/spec/epub-publications.html#sec-core-media-types
                        if extension in self.OPENTYPE_FONTS:
                            font.media_type = 'application/vnd.ms-opentype'
                        elif extension in self.WOFF_FONTS:
                            font.media_type = 'application/font-woff'

                        epub_book.add_item(font)

    def _get_data(self, book):
        """Returns default data for the front and end matter templates.

        It mainly has default metadata from the book.

        :Returns:
          - Dictionary with default data for the templates
        """

        return {
            "title": get_refines(book.metadata, 'title-type', 'main'),
            "subtitle": get_refines(book.metadata, 'title-type', 'subtitle'),
            "shorttitle": get_refines(book.metadata, 'title-type', 'short'),
            "author": get_refines(book.metadata, 'role', 'aut'),
            "publisher": get_metadata(book.metadata, 'publisher'),
            "isbn": get_metadata(book.metadata, 'identifier'),
            "language": get_metadata(book.metadata, 'language'),
            "metadata": book.metadata
        }

    def _is_cover_item(self, item):
        """Determines if an given item is cover type"""

        file_name = os.path.basename(item.file_name)

        cover_types = [ebooklib.epub.EpubCover, ebooklib.epub.EpubCoverHtml]

        return (type(item) in cover_types or file_name == 'cover.xhtml')