예제 #1
0
    c2.content += u'</body></html>'

    # add chapters to the book
    book.add_item(c1)
    book.add_item(c2)

    # create table of contents
    # - add manual link
    # - add section
    # - add auto created links to chapters

    book.toc = ((
        c1,
        c2,
    ))

    # add navigation files
    book.add_item(epub.EpubNcx())
    book.add_item(epub.EpubNav())

    # create spine
    book.spine = [
        'nav',
        c1,
        c2,
    ]

    # create epub file
    opts = {'plugins': [standard.SyntaxPlugin()]}
    epub.write_epub('test.epub', book, opts)
import sys

import ebooklib
from ebooklib import epub
from ebooklib.utils import debug

book = epub.read_epub(sys.argv[1])

debug(book.metadata)
debug(book.spine)
debug(book.toc)

#for it in book.items:
#    debug( it.get_type())

for x in book.get_items_of_type(ebooklib.ITEM_IMAGE):
    debug(x)

from ebooklib.plugins import standard, tidyhtml

opts = {'plugins': [standard.SyntaxPlugin(), tidyhtml.TidyPlugin()]}

epub.write_epub('test.epub', book, opts)
예제 #3
0
class ExportBook(object):
    """
    Base booktype export book class

    If you want to customize export process:
     - create your own `ExportBook` class inside your booktype app (not in Booktype)
     - inherit your class from this class
       (from booktype.apps.export.utils import ExportBook as CoreExportBook)
     - check that your new class has name `ExportBook`
       (class ExportBook(CoreExportBook):)
     - define path in settings to module where your class located
       (BOOKTYPE_EXPORT_CLASS_MODULE = 'appname.module')
    """

    ATTRIBUTES_GLOBAL = standard.ATTRIBUTES_GLOBAL + [
        'data-column', 'data-gap', 'data-valign', 'data-id', 'transform-data'
    ]
    DEFAULT_PLUGINS = [TidyPlugin(), standard.SyntaxPlugin()]
    PREFIXES = {
        'bkterms': 'http://booktype.org/',
        'add_meta_terms': 'http://booktype.org/additional-metadata/'
    }

    def __init__(self, filename, book_version, **kwargs):
        """
        :Args:
          - filename (:class:`str`): First argument
          - book_version: (:class:`booki.editor.models.BookVersion`) BookVersion instance
        """
        self.filename = filename
        self.book_version = book_version
        self.kwargs = kwargs

        self.epub_book = None
        self.toc = None
        self.spine = None
        self.hold_chapters_urls = None
        self.embeded_images = {}
        self.attachments = models.Attachment.objects.filter(
            version=book_version)

        # ICEjs changes are removed by default, so to keep them in the epub
        # we need to pass remove_icejs as False in kwargs
        if kwargs.get('remove_icejs', True):
            self.DEFAULT_PLUGINS.append(IceCleanPlugin())

        # comments reference bubble should be removed by default for now
        # TODO: we should implement a way to attach the comments to the raw epub file
        if kwargs.get('remove_comments', True):
            self.DEFAULT_PLUGINS.insert(0, CommentsCleanPlugin())

        # add extra plugins
        self.DEFAULT_PLUGINS += kwargs.get('extra_plugins', [])

        # add extra attributes_global
        self.ATTRIBUTES_GLOBAL += kwargs.get('extra_attributes_global', [])

        # add extra prefixes
        for k in kwargs.get('extra_prefixes', dict()):
            self.PREFIXES[k] = kwargs['extra_prefixes'][k]

    def _set_metadata(self):
        """
        Set metadata to the epub book

        :Args:
          - self (:class:`ExportBook`): current class instance
        """

        self.epub_book = set_booktype_metada(self.epub_book,
                                             self.book_version.book)

    def _add_prefix(self):
        """
        Add prefixes

        :Args:
          - self (:class:`ExportBook`): current class instance
        """

        for k in self.PREFIXES:
            self.epub_book.add_prefix(k, self.PREFIXES[k])

    def _chapter_content_hook(self, content):
        """
        Access to chapter's content html before any other actions.

        :Args:
          - self (:class:`ExportBook`): current class instance
          - content (:class:`unicode`): chapter content html as unicode

        :Returns:
          Updated chapter's content
        """

        return content

    def _chapter_tree_hook(self, tree):
        """
        Access to chapter's content as lxml.html.HtmlElement instance.

        :Args:
          - self (:class:`ExportBook`): current class instance
          - tree (:class:`lxml.html.HtmlElement`): chapter content as lxml.html.HtmlElement instance
        """

        pass

    def _epub_chapter_hook(self, epub_chapter):
        """
        Access to epub chapter object.

        :Args:
          - self (:class:`ExportBook`): current class instance
          - epub_chapter (:class:`ebooklib.epub.EpubHtml`): epub chapter instance
        """

        pass

    def _handle_chapter_element(self, elem):
        """
        Access to separate element from chapter's etree.

        :Args:
          - self (:class:`ExportBook`): current class instance
          - elem (:class:`elem.lxml.html.HtmlElement`): element from chapter's etree
        """

        # handle links
        if elem.tag == 'a':
            href = elem.get('href')
            if href and href.startswith('../'):
                urlp = urlparse.urlparse(href)

                url_title = urlp.path[3:-1]

                # if link on chapter on hold -> remove tag
                if url_title not in self.hold_chapters_urls:
                    fixed_href = url_title + '.xhtml'
                    if urlp.fragment:
                        fixed_href = "{}#{}".format(fixed_href, urlp.fragment)
                    elem.set('href', fixed_href)
                else:
                    elem.drop_tag()

        # handle images
        if elem.tag == 'img':

            if elem.getparent().tag != 'div' or \
                            'class' not in elem.getparent().attrib or \
                            'image' not in elem.getparent().attrib['class'].split():
                image_div = etree.Element('div', {'class': 'image'})
                elem.addprevious(image_div)
                image_div.insert(0, elem)

            image_div = elem.getparent()

            if image_div.getparent().tag != 'div' or \
                            'class' not in image_div.getparent().attrib or \
                            'group_img' not in image_div.getparent().attrib['class'].split():
                group_img = etree.Element('div', {'class': 'group_img'})
                image_div.addprevious(group_img)
                group_img.insert(0, image_div)

            src = elem.get('src')

            if src:
                elem.set('src', 'static/' + src[7:])
                self.embeded_images[src] = True

        # remove endnotes without reference
        if elem.tag == 'ol' and elem.get('class') == 'endnotes':
            for li in elem.xpath("//li[@class='orphan-endnote']"):
                li.drop_tree()

    def _create_epub_images(self):
        """
        Create epub image objects

        :Args:
          - self (:class:`ExportBook`): current class instance
        """

        for i, attachment in enumerate(self.attachments):
            if ('static/' + os.path.basename(
                    attachment.attachment.name)) not in self.embeded_images:
                continue

            try:
                f = open(attachment.attachment.name, "rb")
                blob = f.read()
                f.close()
            except (IOError, OSError):
                continue
            else:
                filename = os.path.basename(
                    attachment.attachment.name.encode("utf-8"))
                itm = epub.EpubImage()
                itm.file_name = 'static/%s' % filename
                itm.content = blob
                self.epub_book.add_item(itm)

    def _create_toc(self):
        """
        Create table of contents

        :Args:
          - self (:class:`ExportBook`): current class instance
        """

        self.toc = OrderedDict()
        self.spine = ['nav']

        self.hold_chapters_urls = [
            i.url_title for i in self.book_version.get_hold_chapters()
        ]

        for chapter in self.book_version.get_toc():
            if chapter.chapter:
                c1 = epub.EpubHtml(title=chapter.chapter.title,
                                   file_name='%s.xhtml' %
                                   (chapter.chapter.url_title, ))

                # hook for some extra customizations
                cont = self._chapter_content_hook(chapter.chapter.content)

                try:
                    tree = parse_html_string(cont.encode('utf-8'))
                except Exception as err:
                    logger.error('Error parsing chapter content %s' % err)
                    continue

                # hook for some extra customizations
                self._chapter_tree_hook(tree)

                for elem in tree.iter():
                    self._handle_chapter_element(elem)

                c1.content = etree.tostring(tree,
                                            pretty_print=True,
                                            encoding='utf-8',
                                            xml_declaration=True)

                # hook for some extra customizations
                self._epub_chapter_hook(c1)

                self.epub_book.add_item(c1)
                self.spine.append(c1)

                if chapter.parent:
                    self.toc[chapter.parent.id][1].append(c1)
                else:
                    if chapter.has_children():
                        self.toc[chapter.id] = [c1, []]
                    else:
                        self.toc[chapter.id] = c1
            else:
                epub_sec = epub.Section(chapter.name)

                if chapter.parent:
                    self.toc[chapter.parent.id][1].append(epub_sec)
                else:
                    self.toc[chapter.id] = [epub_sec, []]

    def _set_sections_settings(self):
        """
        Stores the sections settings inside the book metadata that would be
        used by converter scripts. Using metadata give us the advantage of being
        still generating a valid epub in case these settings are not removed.

        :Args:
          - self (:class:`ExportBook`): current class instance
        """

        from booktype.apps.convert.plugin import SectionsSettingsPlugin

        settings = {}
        count = 1
        for item in self.book_version.get_toc():
            if item.is_section() and item.has_children() and item.settings:
                key = SectionsSettingsPlugin.build_section_key(
                    item.name, count)
                settings[key] = item.settings
                count += 1

        self.epub_book.add_metadata(None, 'meta', json.dumps(settings),
                                    {'property': 'bkterms:sections_settings'})

    def run(self):
        """
        Run export process.
        Write epub file.

        :Args:
          - self (:class:`ExportBook`): current class instance
        """
        self.epub_book = ExportEpubBook()

        self._set_metadata()
        self._add_prefix()
        self._create_toc()
        self._create_epub_images()
        self._set_sections_settings()

        self.epub_book.toc = self.toc.values()
        self.epub_book.spine = self.spine
        self.epub_book.add_item(epub.EpubNcx())
        self.epub_book.add_item(epub.EpubNav())

        standard.ATTRIBUTES_GLOBAL = self.ATTRIBUTES_GLOBAL

        epub.write_epub(self.filename, self.epub_book,
                        {'plugins': self.DEFAULT_PLUGINS})
예제 #4
0
파일: views.py 프로젝트: zeuser/Booktype
def export_book(input_file, filename):
    """Reads content of book in Booki.zip format and converts it to EPUB format.

    This function reads content of the book in Booki.zip file, creates new
    book in EPUB format and converts entire content into it. There are some
    things which are different in new EPUB format. One of them is how links 
    and interlinks are handled.
    """

    epub_book = ExportEpubBook()

    # Creating new EPUB file
    epub_book.add_prefix('bkterms', 'http://booktype.org/')

    # Read old Booki.zip format
    bookizip = BookiZip(input_file)

    _toc, _section, _section_name = [], [], None
    spine = ['nav']

    # Get filesnames of all the chapters/sections
    file_names = [file_name[6:-5] for _, file_name, _ in bookizip.get_toc()]

    x = 0
    for typ, file_name, title in bookizip.get_toc():
        # Ignore sections
        if typ == 1:
            if _section_name is None and len(_section) > 0:
                _toc.append(_section)
            elif len(_section) > 0:
                _toc.append((epub.Section(_section_name), _section[:]))

            _section_name = title
            _section = []
            continue

        # Create new chapter with new filename
        c1 = epub.EpubHtml(title=title,
                           file_name='{}.xhtml'.format(file_name[6:-5]))
        cont = unicode(bookizip.read(file_name), 'utf-8')
        _section.append(c1)

        try:
            tree = parse_html_string(cont.encode('utf-8'))
        except:
            # Just ignore everything if we can not parse the chapter
            continue

        # Change all the links in the document
        for elem in tree.iter():
            if elem.tag == 'a':
                href = elem.get('href')

                if href:
                    urlp = urlparse.urlparse(href)
                    url_title = urlp.path

                    if urlp.scheme == '':
                        if url_title and url_title in file_names:
                            fixed_href = url_title + '.xhtml'
                            if urlp.fragment:
                                fixed_href = "{}#{}".format(
                                    fixed_href, urlp.fragment)

                            elem.set('href', fixed_href)
                        else:
                            # ovdje brishe sve shto je externo. to se ne bi trebalo desavati
                            elem.drop_tag()

            c1.content = etree.tostring(tree,
                                        pretty_print=True,
                                        encoding='utf-8',
                                        xml_declaration=True)

        epub_book.add_item(c1)
        spine.append(c1)
        x += 1

    if _section_name is None and len(_section) > 0:
        _toc.append(_section)
    elif len(_section) > 0:
        _toc.append((epub.Section(_section_name), _section[:]))

    # Add all of the attachments
    for att_name in bookizip.get_attachments():
        try:
            blob = bookizip.read(att_name)
        except (IOError, OSError):
            continue
        else:
            itm = epub.EpubImage()
            itm.file_name = att_name
            itm.content = blob
            epub_book.add_item(itm)

    epub_book.set_title('Title', 'main')
    epub_book.set_language('en')
    epub_book.add_author('Author', role='aut', uid='author')

    epub_book.toc = _toc
    epub_book.spine = spine

    epub_book.add_item(epub.EpubNcx())
    epub_book.add_item(epub.EpubNav())

    opts = {'plugins': [TidyPlugin(), standard.SyntaxPlugin()]}
    epub.write_epub(filename, epub_book, opts)