def build_book(articles, filename): book = epub.EpubBook() book.set_identifier("pinkindbook") book.set_title('Pinkind Article Collection') book.set_language('en') book.add_author('pinkind.dbalan.in') style = 'body { font-family: Times, Times New Roman, serif; }' nav_css = epub.EpubItem(uid="style_nav", file_name="style/nav.css", media_type="text/css", content=style) nav_css = epub.EpubItem(uid="style_nav", file_name="style/nav.css", media_type="text/css", content=style) book.add_item(nav_css) spine = [] for art in articles: book.add_item(art) spine.append(art) book.toc = spine book.spine = ['nav'] + spine book.add_item(epub.EpubNcx()) book.add_item(epub.EpubNav()) epub.write_epub(filename, book)
def __init__(self, identifier, title, contributor, series_title): self.__identifier = identifier self.__title = title self.__contributor = contributor self.__series_title = series_title self.book = epub.EpubBook() self.book.FOLDER_NAME = r"OEBPS" self.book.set_identifier(self.__identifier) self.book.set_title(self.__title) self.book.set_language('ja') self.book.set_direction('rtl') self.book.add_metadata('DC', 'contributor', self.__contributor) self.book.add_metadata('DC', 'rights', "All rights reserved by the contributor") self.book.add_author(self.__contributor) if len(self.__series_title) >= 1: self.book.add_metadata( None, 'meta', self.__series_title, OrderedDict([('property', 'belongs-to-collection'), ('id', 'series_id')])) self.book.add_metadata( None, 'meta', r"series", OrderedDict([('refines', '#series_id'), ('property', 'collection-type')])) self.tocs = [] currentPath = os.path.dirname(os.path.abspath(__file__)) css_path = os.path.abspath( os.path.join(currentPath, r"assets/stylesheet.css")) style = TextFileManager(css_path).load() self.default_css = epub.EpubItem(uid="style", file_name="stylesheet.css", media_type="text/css", content=style) self.book.add_item(self.default_css) cover_css_path = os.path.abspath( os.path.join(currentPath, r"assets/cover.css")) style_cover = TextFileManager(cover_css_path).load() self.cover_css = epub.EpubItem(uid="style_cover", file_name="cover.css", media_type="text/css", content=style_cover) self.book.add_item(self.cover_css)
def make_epub(id, title, lang, author, cover_path, subtitle): book = epub.EpubBook() # add metadata book.set_identifier('ted_' + id) book.set_title(title) book.set_language(lang) book.add_author(author) # add cover image book.set_cover("cover.jpg", open(cover_path, 'rb').read()) chpt = epub.EpubHtml(title='subtitle', file_name='subtitle.xhtml', lang=lang) chpt.content = '<html><head></head><body>' + subtitle + '</body></html>' book.add_item(chpt) book.add_item(epub.EpubNcx()) book.add_item(epub.EpubNav()) style = 'BODY {color: while;}' nav_css = epub.EpubItem(uid="style_nav", file_name="style/nav.css", media_type="text/css", content=style) book.add_item(nav_css) book.spine = ['nav', chpt] epub_path = './epub/' + id + '.epub' epub.write_epub(epub_path, book, {}) return
def create_book_chapter(self, chapter_url): chapter_info = self.extractor.get_chapter_info(chapter_url) chapter_content = self.extractor.get_chapter_content( chapter_info["content"]) if chapter_info != None and chapter_content != None: title = chapter_info["title"] print(f"create_book_chapter: creating {title}") chapter = ebooklib.EpubHtml(title=title, file_name=chapter_info["full_path"], lang=self.lang) chapter.set_content(chapter_content) for stylesheet in chapter_info["stylesheets"]: css_url = stylesheet["url"] name = stylesheet["full_path"] if css_url not in self.styles: style = self.extractor.get_chapter_style(css_url) nav_css = ebooklib.EpubItem( uid=f"style_{name.replace('.css', '')}", file_name=f"{name}", media_type="text/css", content=style) self.styles.append(css_url) chapter.add_item(nav_css) self.chapters.append(chapter) self.epub.add_item(chapter) self.create_images(chapter_info["images"], chapter_info["asset_base_url"]) else: print("create_book_chapter: errored on ", chapter_url)
def makeImage(self, chapter_content, chapter_id): img_tags = chapter_content.findAll('img') img_urls = [] if img_tags: for img_tag in img_tags: img_urls.append(img_tag.get('src')) content = str(chapter_content) for i, img_url in enumerate(img_urls): try: img = Utils().getImage(img_url) b = BytesIO() img.save(b, 'jpeg') b_img = b.getvalue() img_path = 'images/chapter_' + \ str(chapter_id) + '/image_' + str(i) + '.jpeg' image_item = epub.EpubItem(file_name=img_path, media_type='image/jpeg', content=b_img) self.book.add_item(image_item) img_old_path = 'src="' + img_url img_new_path = 'style="display: block;margin-left: auto;margin-right: auto;" src="' + img_path content = content.replace(img_old_path, img_new_path) except: print('Error: Can not get chapter images! ' + img_url) else: content = str(chapter_content) return content
def add_css(self): # compile assets default_collector.collect(self.request) # add the files that produce export.css pkg = self.package_for('epub', 'css') packager = Packager() paths = packager.compile(pkg.paths) self.stylesheets = [] for path in paths: with codecs.open(staticfiles_storage.path(path), 'r', 'utf-8') as f: css = f.read() self.book.add_item( epub.EpubItem(file_name=path, media_type="text/css", content=css)) self.stylesheets.append(path) # now ensure all html items link to the stylesheets for item in self.book.items: if isinstance(item, (epub.EpubHtml, epub.EpubNav)): for stylesheet in self.stylesheets: # relativise path href = '/'.join(['..'] * item.file_name.count('/') + [stylesheet]) item.add_link(href=href, rel='stylesheet', type='text/css')
def add_css(book): style = """ @namespace epub "http://www.idpf.org/2007/ops"; body { font-family: Cambria, Liberation Serif, Bitstream Vera Serif, Georgia, Times, Times New Roman, serif; } h2 { text-align: left; text-transform: uppercase; font-weight: 200; } ol { list-style-type: none; } ol > li:first-child { margin-top: 0.3em; } nav[epub|type~='toc'] > ol > li > ol { list-style-type:square; } nav[epub|type~='toc'] > ol > li > ol > li { margin-top: 0.3em; } """ # add css file nav_css = epub.EpubItem(uid="style_nav", file_name="style/nav.css", media_type="text/css", content=style) book.add_item(nav_css)
def process_output_to_epub(feed: FeedParserDict, limit: int) -> epub.EpubBook: """Form epub structure for epub file""" logging.debug("Starting format html for epub file") book = epub.EpubBook() book.set_identifier('rss news') book.set_title(feed.get("feed", {}).get("title")) book.set_language('en') book.spine = ['nav'] book.toc = [] for news in feed.get("entries")[:limit]: chapter = epub.EpubHtml(title=news.get("title"), file_name=str(hash(news.get("title"))) + '.xhtml') content = BeautifulSoup( create_title(news.get("title")) + news.get("summary") + create_link(news.get("link")), "lxml") images = content.find_all('img') process_images(images, book) chapter.set_content(str(content)) book.add_item(chapter) book.spine.append(chapter) book.toc.append(chapter) book.add_item(epub.EpubNcx()) book.add_item(epub.EpubNav()) style = 'BODY {color: white;}' nav_css = epub.EpubItem(uid="style_nav", file_name="style/nav.css", media_type="text/css", content=style) book.add_item(nav_css) logging.debug("Structure for epub file created") return book
def fetch_img(self, url_img): if self.image_size is not None and self.image_size < self.image_count: self.logger.warn('达到最大图像总计大小,取消图像下载') # 此时文档中的链接是错误的...所以贪心要付出代价 # 上一行注释是啥来着(?) return if not self.running: logger.warning(f'Canceling image: {url_img}') return self.logger.info('->Fetching image: ' + url_img + '...') data_img = requests.get(url_img, headers={ 'User-Agent': Wenku8ToEpub.USER_AGENT }, proxies=self.get_proxy()).content self.image_count = self.image_count + len(data_img) filename = None for sp in self.img_splits: if sp in url_img: filename = url_img.split(sp)[-1] if filename is None: filename = url_img.split(':')[-1].split('//')[-1] filetype = url_img.split('.')[-1] # print('done. filename:', filename, "filetype", filetype) img = epub.EpubItem(file_name="images/%s" % filename, media_type="image/%s" % filetype, content=data_img) self.lock.acquire() self.book.add_item(img) self.lock.release() self.logger.info('<-Done image: ' + url_img)
def export_epub(self, book_name): # set metadata # add default NCX and Nav file self.book.add_item(epub.EpubNcx()) self.book.add_item(epub.EpubNav()) # define CSS style style = 'BODY {color: white;}' nav_css = epub.EpubItem(uid="style_nav", file_name="style/nav.css", media_type="text/css", content=style) # add CSS file self.book.add_item(nav_css) # basic spine self.book.spine = ['nav'] + self.chapterinfo # write to the file output_folder = os.path.join(os.path.dirname(__file__), "output") output_file = os.path.join(output_folder, book_name + '.epub') try: os.mkdir(output_folder) except: pass epub.write_epub(output_file, self.book, {}) print('輸出路徑: ' + output_file) print('Done')
def work(project, _vars): book = epub.EpubBook() book.set_identifier(_vars.nid) book.set_title(_vars.title) book.set_language('zh') book.add_author(_vars.author) book.add_item(epub.EpubNav()) book.add_item(epub.EpubNcx()) book.add_item( epub.EpubItem(uid="style_nav", file_name="style/style.css", media_type="text/css", content=css)) book.spine = ['nav'] book.add_metadata('DC', 'description', _vars.description) book.toc = tuple( (epub.Section(title), tuple( build_page(book, f'./{project}/{file}', file.replace(".tex", "")) for file in files)) for title, files in _vars.menu.items()) epub.write_epub(f"./artifacts/{project}/epub/{project}_latest.epub", book, {'epub3_pages': False}) shutil.copy( f"./artifacts/{project}/epub/{project}_latest.epub", f"./artifacts/{project}/epub/history/{project}_{datetime.datetime.now().strftime('%y%m%d')}.epub" ) _abspath = os.path.abspath( f"./artifacts/{project}/epub/{project}_latest.epub") print(f'[{now}] Epub file saved at {_abspath}.')
def _write_chapter(self, chapter, content, image_name): """ Write Chapter Args: chapter (string): [Name of chapter] content (string): [Content of chapter] Returns: [epub.EpubHtml]: [Chapter of book] """ # create chapter chap = epub.EpubHtml(title=chapter, file_name=chapter + '.xhtml', lang='vi') chap.content = content chap.add_item(self.book_default_css) # add chapter self.book.add_item(chap) # add image for iname in image_name: image_item = epub.EpubItem(file_name=iname, content=open(iname, 'rb').read(),) self.book.add_item(image_item) return chap
def __init_book(self): """ [Init epub writer] """ self.book = epub.EpubBook() self.book.set_identifier(self.name) self.book.set_title(self.name) self.book.set_language('vi') self.book.set_cover("temp.png", open('temp.png', 'rb').read()) self.book.add_author(self.author) # defube style style = ''' p { margin-top: 0.0em; margin-bottom: 0.3em; text-indent: 1.3em; } ''' self.book_default_css = epub.EpubItem(uid="style_default", file_name="style/default.css", media_type="text/css", content=style) self.book.add_item(self.book_default_css) self.book.add_metadata(None, 'meta', '', {'name': 'website', 'content': self.start_url, })
def build_epub(self): logger.info(f"build epub for {self}...") self.epub = epub.EpubBook() self.epub.set_title(f"{self.novel.title} - {self.title}") self.epub.set_identifier(uuid.uuid4().hex) self.epub.set_language('en') self.epub.add_item(epub.EpubNcx()) self.epub.add_item(epub.EpubNav()) self.epub.spine = ['Nav'] + self.build_chapters() st = 'p { margin-top: 1em; text-indent: 0em; } ' \ 'h1 {margin-top: 1em; text-align: center} ' \ 'h2 {margin: 2em 0 1em; text-align: center; font-size: 2.5em;} ' \ 'h3 {margin: 0 0 2em; font-weight: normal; text-align:center; font-size: 1.5em; font-style: italic;} ' \ '.center { text-align: center; } ' \ '.pagebreak { page-break-before: always; } ' nav_css = epub.EpubItem(uid="style_nav", file_name="style/nav.css", media_type="text/css", content=st) self.epub.add_item(nav_css) novel_cache_path = self.novel.get_cache_path() epub_file_path = os.path.join(novel_cache_path, f'{self.epub.title}.epub') epub.write_epub(epub_file_path, self.epub, {}) logger.info(f"Epub for volume {self} done!")
def bind_and_save_epub(self): """ Finalizes binding of the ebook and saves it to the filesystem. """ print("=== Binding and saving EPUB. ===") self.book.toc = (self.weather_link, (epub.Section("Articles"), tuple(self.article_toc_list)) ) # add navigation files self.book.add_item(epub.EpubNcx()) self.book.add_item(epub.EpubNav()) # define css style with open('tmpl/book_style.css', 'r') as css_file: style = css_file.read() # add css file nav_css = epub.EpubItem(uid="style_nav", file_name="style/nav.css", media_type="text/css", content=style) self.book.add_item(nav_css) self.chaps.insert(0, 'nav') self.book.spine = self.chaps self.book_filename = 'news_update_{}.epub'.format(self.target_time) epub.write_epub(self.book_filename, self.book, {}) print("Saved as {}.".format(self.book_filename))
def filter_chapter(self, html, new_css_item): '''每一章都是一个html ''' #给每一个html文件添加css rel_css_dir = os.path.relpath('.', os.path.dirname(html.get_name())) rel_css_file_name=os.path.join(rel_css_dir, self.new_css_filename) html.add_item(epub.EpubItem(file_name=rel_css_file_name, media_type='text/css')) rel_image_dir=os.path.relpath(self.font_image_dir, os.path.dirname(html.get_name())) #生成html对应的links self.init_links_of_html(html) #只处理body里面正文处理 html_tree = parse_html_string(html.get_body_content()) root=html_tree.getroottree() build_text_list = etree.XPath("//text()") text_list=build_text_list(root) for text in text_list: #找出一段文字中生僻字的位置 pos_list = self.find_uncommon_words_in_one_text(text) self.add_image_tag_for_uncommon_words_in_one_text(text, pos_list,rel_image_dir) #将root更新到html的content中,不然的话,不会保存 ori_root=parse_html_string(html.content) #删除旧的body body=ori_root.find('body') ori_root.remove(body) #新的body ori_root.append(root.find('body')) html.content = etree.tostring(ori_root, pretty_print=True, encoding='utf-8', xml_declaration=True)
def create_book(self, feed): """ add news on epub format :param feed: feed news :return: book of news """ logging.debug("Starting format html for epub file") self.book.spine = ['nav'] self.book.toc = [] for title, date, link, img in zip(feed["Title"], feed["Date"], feed['Link'], feed["img"]): chapter = epub.EpubHtml(title=title, file_name=title + '.xhtml') content = f"<h4>{title}</h4><br>Date:{date}<br><a href='{link}'>Link<br><img src='{img}'" \ f"alt='NoPhoto' ></a>" chapter.set_content(str(content)) self.book.add_item(chapter) self.book.toc.append(chapter) self.book.add_item(epub.EpubNcx()) self.book.add_item(epub.EpubNav()) style = 'BODY {color: white;}' nav_css = epub.EpubItem(uid="style_nav", file_name="style/nav.css", media_type="text/css", content=style) self.book.add_item(nav_css) logging.debug("Structure for epub file created") return self.book
def write_epub(): article_content_list = get_all_article_content() book = epub.EpubBook() book.set_identifier('id310626') book.set_language('en') book.add_author('Mark Manson') chapter_list = [] for i, article in enumerate(article_content_list): chapter_title = article[0] chapter_content = article[1] chapter = epub.EpubHtml(title=chapter_title, file_name=str(i), lang='en') chapter_list.append(chapter) chapter.content = chapter_content book.add_item(chapter) book.spine = ['nav'] + chapter_list book.toc = tuple(chapter_list) book.add_item(epub.EpubNcx()) book.add_item(epub.EpubNav()) style = 'BODY {color:white;}' nav_css = epub.EpubItem(uid='style_nav', file_name='style/nav.css', media_type='text/css', content=style) book.add_item(nav_css) epub.write_epub('Manson.epub', book, {})
def union(self, name, size): file_name = self._get_valid_name(name) book = epub.EpubBook() book.set_identifier(file_name) book.set_title(file_name) book.set_language('zh') spine_list = ['nav'] toc_list = [] for index in range(1, size + 1): content = self.__read_content(index) lines_of_content = content.split('\n') title = lines_of_content[0] ch = epub.EpubHtml(title=title, file_name='ch_{}.xhtml'.format(index), lang='zh') ch.content = '<h4>{}</h4>{}'.format( title, self.__create_content(lines_of_content[1:])) book.add_item(ch) spine_list.append(ch) toc_list.append(ch) book.add_item(epub.EpubNcx()) book.add_item(epub.EpubNav()) style = 'BODY {color: white;}' nav_css = epub.EpubItem(uid="style_nav", file_name="style/nav.css", media_type="text/css", content=style) book.add_item(nav_css) book.toc = toc_list book.spine = spine_list epub.write_epub(self._base_path + file_name + '.epub', book, {})
def contents(self): if not self._workpath: return self._toc = [] def _page_name(i): return "pdf_frame%s.html" % (str(i) if i else '') n = len(self._workpath) for i in range(n): p = self._workpath[i] for filename in glob(os.path.join(p, '*.html')): with open(filename, 'r') as f: content = f.read() m = 'link rel="stylesheet" href="' s = '../Styles/%d/' % i content = content.replace(m, m + s) if i: m = '<div id="page-container">' s = PREV_TEMPLATE % _page_name(i - 1) content = content.replace(m, m + s) if i < n - 1: m = '</div>\n<div class="loading-indicator">' s = NEXT_TEMPLATE % _page_name(i + 1) content = content.replace(m, s + m) url = "Text/%s" % _page_name(i) page = epub.EpubItem(file_name=url, content=content) yield page for p in self._workpath: for filename in glob(os.path.join(p, 'chapter*.xhtml')): name = os.path.basename(filename) with open(filename, 'rb') as f: page = epub.EpubItem(file_name="Text/%s" % name, content=f.read()) yield page n = 0 for p in self._workpath: prefix = 'Styles/' + str(n) n += 1 for filename in glob(os.path.join(p, '*.woff')): name = os.path.basename(filename) with open(filename, 'rb') as f: page = epub.EpubItem(file_name="%s/%s" % (prefix, name), content=f.read()) yield page
def read_comments_css(): style = load_resource_text("styles.css") return epub.EpubItem( uid="style_nav", file_name="style/comments.css", media_type="text/css", content=style, )
def images(self): for filename in glob(os.path.join(self._workpath, '*.jpg')): name = os.path.basename(filename) with open(filename, 'rb') as f: yield epub.EpubItem(uid=name, file_name="Text/%s" % name, media_type="images/jpg", content=f.read())
def post_process(self): self.book.add_item(epub.EpubNcx()) self.book.add_item(epub.EpubNav()) self.book.add_item( epub.EpubItem(uid="style_nav", file_name="style/nav.css", media_type="text/css", content=css))
def add_body_style(self, css): # Define the CSS style self.body_css = epub.EpubItem(uid="style_body", file_name="style/body.css", media_type="text/css", content=css) # Add the style as a file self.book.add_item(self.body_css)
def stylesheets(self): for filename in glob(os.path.join(self._workpath, '*.css')): name = os.path.basename(filename) with open(filename, "rb") as f: yield epub.EpubItem(uid=name, file_name="Styles/%s" % name, media_type="text/css", content=f.read())
def to_epub(self, style=None): """Generate `EpubBook` from result of `get_articles`""" if not style: style = STYLE logging.info('Creating book using %s style' % style) articles = self.get_articles() book = epub.EpubBook() book.set_title(self.__class__.name) # Create HTML file for each article chapters = [] for i, article in enumerate(articles): chapter = epub.EpubHtml(uid=str(i), title=article.title, file_name=('%d.xhtml' % i)) chapter.content = '<html><head>' + \ '<link rel="stylesheet" href="style/default.css" />' + \ '</head><body>' + \ ('<h1>%s</h1>' % article.title) + \ article.content + '</body></html>' chapters.append(chapter) book.add_item(chapter) # Add generic book metadata book.toc = map( lambda c: epub.Link(c.get_name(), c.title, str(c.get_id())), chapters) book.add_item(epub.EpubNcx()) book.add_item(epub.EpubNav()) # Add stylesheet # Use path if file exists if os.path.exists(style): style_path = style logging.debug('Using custom style %s' % style) # Otherwise use preset style else: style_name = style if style.endswith('.css') else style + '.css' style_path = os.path.join(STYLES_PATH, style_name) if not os.path.exists(style_path): raise Exception('%s is not a preset style' % style) logging.debug('Using preset style %s' % style) with open(style_path) as f: nav_css = epub.EpubItem(uid="style_nav", file_name="style/default.css", media_type="text/css", content=f.read()) book.add_item(nav_css) book.spine = ['nav'] + chapters return book
def __init__(self, config, callbacks): self.config = config self.callbacks = callbacks self.chapters = [] self.title = config.book.title self.author = config.book.author with open(config.book.css_filename, 'r') as css: self.css = epub.EpubItem(uid='default', file_name="style/"+config.book.css_filename, media_type="text/css", content=css.read())
def define_css(book_id, new_book_id): logger.info('Defining css for book book_id: %s and new_book_id:%s', book_id, new_book_id) style = 'BODY {color: white;}' nav_css = epub.EpubItem(uid="style_nav", file_name="style/nav.css", media_type="text/css", content=style) return nav_css
async def download_img(self, url, sem): async with sem: filename = os.path.basename(url) data = requests.get(url).content file_type = filename.split('.')[-1] item_img = epub.EpubItem(file_name="images/%s" % filename, media_type="image/%s" % file_type, content=data) self.book.add_item(item_img) self.logger.info('<-Done image: ' + url)
def _add_css(self): # define CSS style style = 'BODY {color: white;}' nav_css = epub.EpubItem(uid="style_nav", file_name="style/nav.css", media_type="text/css", content=style) self.book.add_item(nav_css)