c2.content += u'</body></html>' # add chapters to the book book.add_item(c1) book.add_item(c2) # create table of contents # - add manual link # - add section # - add auto created links to chapters book.toc = (( c1, c2, )) # add navigation files book.add_item(epub.EpubNcx()) book.add_item(epub.EpubNav()) # create spine book.spine = [ 'nav', c1, c2, ] # create epub file opts = {'plugins': [standard.SyntaxPlugin()]} epub.write_epub('test.epub', book, opts)
import sys import ebooklib from ebooklib import epub from ebooklib.utils import debug book = epub.read_epub(sys.argv[1]) debug(book.metadata) debug(book.spine) debug(book.toc) #for it in book.items: # debug( it.get_type()) for x in book.get_items_of_type(ebooklib.ITEM_IMAGE): debug(x) from ebooklib.plugins import standard, tidyhtml opts = {'plugins': [standard.SyntaxPlugin(), tidyhtml.TidyPlugin()]} epub.write_epub('test.epub', book, opts)
class ExportBook(object): """ Base booktype export book class If you want to customize export process: - create your own `ExportBook` class inside your booktype app (not in Booktype) - inherit your class from this class (from booktype.apps.export.utils import ExportBook as CoreExportBook) - check that your new class has name `ExportBook` (class ExportBook(CoreExportBook):) - define path in settings to module where your class located (BOOKTYPE_EXPORT_CLASS_MODULE = 'appname.module') """ ATTRIBUTES_GLOBAL = standard.ATTRIBUTES_GLOBAL + [ 'data-column', 'data-gap', 'data-valign', 'data-id', 'transform-data' ] DEFAULT_PLUGINS = [TidyPlugin(), standard.SyntaxPlugin()] PREFIXES = { 'bkterms': 'http://booktype.org/', 'add_meta_terms': 'http://booktype.org/additional-metadata/' } def __init__(self, filename, book_version, **kwargs): """ :Args: - filename (:class:`str`): First argument - book_version: (:class:`booki.editor.models.BookVersion`) BookVersion instance """ self.filename = filename self.book_version = book_version self.kwargs = kwargs self.epub_book = None self.toc = None self.spine = None self.hold_chapters_urls = None self.embeded_images = {} self.attachments = models.Attachment.objects.filter( version=book_version) # ICEjs changes are removed by default, so to keep them in the epub # we need to pass remove_icejs as False in kwargs if kwargs.get('remove_icejs', True): self.DEFAULT_PLUGINS.append(IceCleanPlugin()) # comments reference bubble should be removed by default for now # TODO: we should implement a way to attach the comments to the raw epub file if kwargs.get('remove_comments', True): self.DEFAULT_PLUGINS.insert(0, CommentsCleanPlugin()) # add extra plugins self.DEFAULT_PLUGINS += kwargs.get('extra_plugins', []) # add extra attributes_global self.ATTRIBUTES_GLOBAL += kwargs.get('extra_attributes_global', []) # add extra prefixes for k in kwargs.get('extra_prefixes', dict()): self.PREFIXES[k] = kwargs['extra_prefixes'][k] def _set_metadata(self): """ Set metadata to the epub book :Args: - self (:class:`ExportBook`): current class instance """ self.epub_book = set_booktype_metada(self.epub_book, self.book_version.book) def _add_prefix(self): """ Add prefixes :Args: - self (:class:`ExportBook`): current class instance """ for k in self.PREFIXES: self.epub_book.add_prefix(k, self.PREFIXES[k]) def _chapter_content_hook(self, content): """ Access to chapter's content html before any other actions. :Args: - self (:class:`ExportBook`): current class instance - content (:class:`unicode`): chapter content html as unicode :Returns: Updated chapter's content """ return content def _chapter_tree_hook(self, tree): """ Access to chapter's content as lxml.html.HtmlElement instance. :Args: - self (:class:`ExportBook`): current class instance - tree (:class:`lxml.html.HtmlElement`): chapter content as lxml.html.HtmlElement instance """ pass def _epub_chapter_hook(self, epub_chapter): """ Access to epub chapter object. :Args: - self (:class:`ExportBook`): current class instance - epub_chapter (:class:`ebooklib.epub.EpubHtml`): epub chapter instance """ pass def _handle_chapter_element(self, elem): """ Access to separate element from chapter's etree. :Args: - self (:class:`ExportBook`): current class instance - elem (:class:`elem.lxml.html.HtmlElement`): element from chapter's etree """ # handle links if elem.tag == 'a': href = elem.get('href') if href and href.startswith('../'): urlp = urlparse.urlparse(href) url_title = urlp.path[3:-1] # if link on chapter on hold -> remove tag if url_title not in self.hold_chapters_urls: fixed_href = url_title + '.xhtml' if urlp.fragment: fixed_href = "{}#{}".format(fixed_href, urlp.fragment) elem.set('href', fixed_href) else: elem.drop_tag() # handle images if elem.tag == 'img': if elem.getparent().tag != 'div' or \ 'class' not in elem.getparent().attrib or \ 'image' not in elem.getparent().attrib['class'].split(): image_div = etree.Element('div', {'class': 'image'}) elem.addprevious(image_div) image_div.insert(0, elem) image_div = elem.getparent() if image_div.getparent().tag != 'div' or \ 'class' not in image_div.getparent().attrib or \ 'group_img' not in image_div.getparent().attrib['class'].split(): group_img = etree.Element('div', {'class': 'group_img'}) image_div.addprevious(group_img) group_img.insert(0, image_div) src = elem.get('src') if src: elem.set('src', 'static/' + src[7:]) self.embeded_images[src] = True # remove endnotes without reference if elem.tag == 'ol' and elem.get('class') == 'endnotes': for li in elem.xpath("//li[@class='orphan-endnote']"): li.drop_tree() def _create_epub_images(self): """ Create epub image objects :Args: - self (:class:`ExportBook`): current class instance """ for i, attachment in enumerate(self.attachments): if ('static/' + os.path.basename( attachment.attachment.name)) not in self.embeded_images: continue try: f = open(attachment.attachment.name, "rb") blob = f.read() f.close() except (IOError, OSError): continue else: filename = os.path.basename( attachment.attachment.name.encode("utf-8")) itm = epub.EpubImage() itm.file_name = 'static/%s' % filename itm.content = blob self.epub_book.add_item(itm) def _create_toc(self): """ Create table of contents :Args: - self (:class:`ExportBook`): current class instance """ self.toc = OrderedDict() self.spine = ['nav'] self.hold_chapters_urls = [ i.url_title for i in self.book_version.get_hold_chapters() ] for chapter in self.book_version.get_toc(): if chapter.chapter: c1 = epub.EpubHtml(title=chapter.chapter.title, file_name='%s.xhtml' % (chapter.chapter.url_title, )) # hook for some extra customizations cont = self._chapter_content_hook(chapter.chapter.content) try: tree = parse_html_string(cont.encode('utf-8')) except Exception as err: logger.error('Error parsing chapter content %s' % err) continue # hook for some extra customizations self._chapter_tree_hook(tree) for elem in tree.iter(): self._handle_chapter_element(elem) c1.content = etree.tostring(tree, pretty_print=True, encoding='utf-8', xml_declaration=True) # hook for some extra customizations self._epub_chapter_hook(c1) self.epub_book.add_item(c1) self.spine.append(c1) if chapter.parent: self.toc[chapter.parent.id][1].append(c1) else: if chapter.has_children(): self.toc[chapter.id] = [c1, []] else: self.toc[chapter.id] = c1 else: epub_sec = epub.Section(chapter.name) if chapter.parent: self.toc[chapter.parent.id][1].append(epub_sec) else: self.toc[chapter.id] = [epub_sec, []] def _set_sections_settings(self): """ Stores the sections settings inside the book metadata that would be used by converter scripts. Using metadata give us the advantage of being still generating a valid epub in case these settings are not removed. :Args: - self (:class:`ExportBook`): current class instance """ from booktype.apps.convert.plugin import SectionsSettingsPlugin settings = {} count = 1 for item in self.book_version.get_toc(): if item.is_section() and item.has_children() and item.settings: key = SectionsSettingsPlugin.build_section_key( item.name, count) settings[key] = item.settings count += 1 self.epub_book.add_metadata(None, 'meta', json.dumps(settings), {'property': 'bkterms:sections_settings'}) def run(self): """ Run export process. Write epub file. :Args: - self (:class:`ExportBook`): current class instance """ self.epub_book = ExportEpubBook() self._set_metadata() self._add_prefix() self._create_toc() self._create_epub_images() self._set_sections_settings() self.epub_book.toc = self.toc.values() self.epub_book.spine = self.spine self.epub_book.add_item(epub.EpubNcx()) self.epub_book.add_item(epub.EpubNav()) standard.ATTRIBUTES_GLOBAL = self.ATTRIBUTES_GLOBAL epub.write_epub(self.filename, self.epub_book, {'plugins': self.DEFAULT_PLUGINS})
def export_book(input_file, filename): """Reads content of book in Booki.zip format and converts it to EPUB format. This function reads content of the book in Booki.zip file, creates new book in EPUB format and converts entire content into it. There are some things which are different in new EPUB format. One of them is how links and interlinks are handled. """ epub_book = ExportEpubBook() # Creating new EPUB file epub_book.add_prefix('bkterms', 'http://booktype.org/') # Read old Booki.zip format bookizip = BookiZip(input_file) _toc, _section, _section_name = [], [], None spine = ['nav'] # Get filesnames of all the chapters/sections file_names = [file_name[6:-5] for _, file_name, _ in bookizip.get_toc()] x = 0 for typ, file_name, title in bookizip.get_toc(): # Ignore sections if typ == 1: if _section_name is None and len(_section) > 0: _toc.append(_section) elif len(_section) > 0: _toc.append((epub.Section(_section_name), _section[:])) _section_name = title _section = [] continue # Create new chapter with new filename c1 = epub.EpubHtml(title=title, file_name='{}.xhtml'.format(file_name[6:-5])) cont = unicode(bookizip.read(file_name), 'utf-8') _section.append(c1) try: tree = parse_html_string(cont.encode('utf-8')) except: # Just ignore everything if we can not parse the chapter continue # Change all the links in the document for elem in tree.iter(): if elem.tag == 'a': href = elem.get('href') if href: urlp = urlparse.urlparse(href) url_title = urlp.path if urlp.scheme == '': if url_title and url_title in file_names: fixed_href = url_title + '.xhtml' if urlp.fragment: fixed_href = "{}#{}".format( fixed_href, urlp.fragment) elem.set('href', fixed_href) else: # ovdje brishe sve shto je externo. to se ne bi trebalo desavati elem.drop_tag() c1.content = etree.tostring(tree, pretty_print=True, encoding='utf-8', xml_declaration=True) epub_book.add_item(c1) spine.append(c1) x += 1 if _section_name is None and len(_section) > 0: _toc.append(_section) elif len(_section) > 0: _toc.append((epub.Section(_section_name), _section[:])) # Add all of the attachments for att_name in bookizip.get_attachments(): try: blob = bookizip.read(att_name) except (IOError, OSError): continue else: itm = epub.EpubImage() itm.file_name = att_name itm.content = blob epub_book.add_item(itm) epub_book.set_title('Title', 'main') epub_book.set_language('en') epub_book.add_author('Author', role='aut', uid='author') epub_book.toc = _toc epub_book.spine = spine epub_book.add_item(epub.EpubNcx()) epub_book.add_item(epub.EpubNav()) opts = {'plugins': [TidyPlugin(), standard.SyntaxPlugin()]} epub.write_epub(filename, epub_book, opts)