def html_content_for(book, src_dir): html_fpath = src_dir.joinpath(fname_for(book, "html")) # is HTML file present? if not html_fpath.exists(): logger.warn("Missing HTML content for #{} at {}".format(book.id, html_fpath)) return None, None try: return read_file(html_fpath) except UnicodeDecodeError: logger.error("Unable to read HTML content: {}".format(html_fpath)) raise
def html_content_for(book, static_folder, download_cache): html_fpath = os.path.join(download_cache, fname_for(book, 'html')) # is HTML file present? if not path(html_fpath).exists(): logger.warn("Missing HTML content for #{} at {}" .format(book.id, html_fpath)) return None, None try: return read_file(html_fpath) except UnicodeDecodeError: logger.error("Unable to read HTML content: {}".format(html_fpath)) raise
def optimize_epub(src, dst): logger.info("\t\tCreating ePUB off {} at {}".format(src, dst)) zipped_files = [] # create temp directory to extract to tmpd = tempfile.mkdtemp(dir=TMP_FOLDER) try: with zipfile.ZipFile(src, "r") as zf: zipped_files = zf.namelist() zf.extractall(tmpd) except zipfile.BadZipFile as exc: shutil.rmtree(tmpd) raise exc remove_cover = False for fname in zipped_files: fnp = os.path.join(tmpd, fname) if path(fname).ext in (".png", ".jpeg", ".jpg", ".gif"): # special case to remove ugly cover if fname.endswith("cover.jpg") and is_bad_cover(fnp): zipped_files.remove(fname) remove_cover = True else: optimize_image(pathlib.Path(fnp), pathlib.Path(fnp), force=True) if path(fname).ext in (".htm", ".html"): html_content, _ = read_file(fnp) html = update_html_for_static( book=book, html_content=html_content, epub=True ) save_bs_output(html, fnp, UTF8) if path(fname).ext == ".ncx": pattern = "*** START: FULL LICENSE ***" ncx, _ = read_file(fnp) soup = BeautifulSoup(ncx, "lxml-xml") for tag in soup.findAll("text"): if pattern in tag.text: s = tag.parent.parent s.decompose() for s in s.next_siblings: s.decompose() s.next_sibling save_bs_output(soup, fnp, UTF8) # delete {id}/cover.jpg if exist and update {id}/content.opf if remove_cover: # remove cover path(os.path.join(tmpd, text_type(book.id), "cover.jpg")).unlink_p() soup = None opff = os.path.join(tmpd, text_type(book.id), "content.opf") if os.path.exists(opff): opff_content, _ = read_file(opff) soup = BeautifulSoup(opff_content, "lxml-xml") for elem in soup.findAll(): if getattr(elem, "attrs", {}).get("href") == "cover.jpg": elem.decompose() save_bs_output(soup, opff, UTF8) # bundle epub as zip zip_epub(epub_fpath=dst, root_folder=tmpd, fpaths=zipped_files) path(tmpd).rmtree_p()
def handle_unoptimized_files( book, static_folder, src_dir, languages, formats, books, project_id, optimizer_version, force=False, title_search=False, add_bookshelves=False, s3_storage=None, ): def copy_file(src, dst): logger.info("\t\tCopying {}".format(dst)) try: shutil.copy2(src, dst) except IOError: logger.error("/!\\ Unable to copy missing file {}".format(src)) return def update_download_cache(unoptimized_file, optimized_file): book_dir = unoptimized_file.parents[1] optimized_dir = book_dir.joinpath("optimized") unoptimized_dir = book_dir.joinpath("unoptimized") if not optimized_dir.exists(): optimized_dir.mkdir() dst = optimized_dir.joinpath(optimized_file.name) os.unlink(unoptimized_file) copy_file(optimized_file.resolve(), dst.resolve()) if not [fpath for fpath in unoptimized_dir.iterdir()]: unoptimized_dir.rmdir() logger.info("\tExporting Book #{id}.".format(id=book.id)) # actual book content, as HTML html, _ = html_content_for(book=book, src_dir=src_dir) html_book_optimized_files = [] if html: article_fpath = static_folder.joinpath(article_name_for(book)) if not article_fpath.exists() or force: logger.info("\t\tExporting to {}".format(article_fpath)) try: new_html = update_html_for_static(book=book, html_content=html) except Exception: raise save_bs_output(new_html, article_fpath, UTF8) html_book_optimized_files.append(article_fpath) update_download_cache( src_dir.joinpath(fname_for(book, "html")), article_fpath ) if not src_dir.exists(): return else: logger.info("\t\tSkipping HTML article {}".format(article_fpath)) def optimize_image(src, dst, force=False): if dst.exists() and not force: logger.info("\tSkipping image optimization for {}".format(dst)) return dst logger.info("\tOptimizing image {}".format(dst)) if src.suffix == ".png": return optimize_png(str(src.resolve()), str(dst.resolve())) if src.suffix in (".jpg", ".jpeg"): return optimize_jpeg(str(src.resolve()), str(dst.resolve())) if src.suffix == ".gif": return optimize_gif(str(src.resolve()), str(dst.resolve())) return dst def optimize_gif(src, dst): exec_cmd(["gifsicle", "-O3", src, "-o", dst]) def optimize_png(src, dst): exec_cmd(["pngquant", "--nofs", "--force", "--output", dst, src]) exec_cmd(["advdef", "-z", "-4", "-i", "5", dst]) def optimize_jpeg(src, dst): if src != dst: copy_file(src, dst) exec_cmd(["jpegoptim", "--strip-all", "-m50", dst]) def optimize_epub(src, dst): logger.info("\t\tCreating ePUB off {} at {}".format(src, dst)) zipped_files = [] # create temp directory to extract to tmpd = tempfile.mkdtemp(dir=TMP_FOLDER) try: with zipfile.ZipFile(src, "r") as zf: zipped_files = zf.namelist() zf.extractall(tmpd) except zipfile.BadZipFile as exc: shutil.rmtree(tmpd) raise exc remove_cover = False for fname in zipped_files: fnp = os.path.join(tmpd, fname) if path(fname).ext in (".png", ".jpeg", ".jpg", ".gif"): # special case to remove ugly cover if fname.endswith("cover.jpg") and is_bad_cover(fnp): zipped_files.remove(fname) remove_cover = True else: optimize_image(pathlib.Path(fnp), pathlib.Path(fnp), force=True) if path(fname).ext in (".htm", ".html"): html_content, _ = read_file(fnp) html = update_html_for_static( book=book, html_content=html_content, epub=True ) save_bs_output(html, fnp, UTF8) if path(fname).ext == ".ncx": pattern = "*** START: FULL LICENSE ***" ncx, _ = read_file(fnp) soup = BeautifulSoup(ncx, "lxml-xml") for tag in soup.findAll("text"): if pattern in tag.text: s = tag.parent.parent s.decompose() for s in s.next_siblings: s.decompose() s.next_sibling save_bs_output(soup, fnp, UTF8) # delete {id}/cover.jpg if exist and update {id}/content.opf if remove_cover: # remove cover path(os.path.join(tmpd, text_type(book.id), "cover.jpg")).unlink_p() soup = None opff = os.path.join(tmpd, text_type(book.id), "content.opf") if os.path.exists(opff): opff_content, _ = read_file(opff) soup = BeautifulSoup(opff_content, "lxml-xml") for elem in soup.findAll(): if getattr(elem, "attrs", {}).get("href") == "cover.jpg": elem.decompose() save_bs_output(soup, opff, UTF8) # bundle epub as zip zip_epub(epub_fpath=dst, root_folder=tmpd, fpaths=zipped_files) path(tmpd).rmtree_p() def handle_companion_file( fname, dstfname=None, book=None, force=False, as_ext=None, html_file_list=None, s3_storage=None, ): ext = fname.suffix if as_ext is None else as_ext src = fname if dstfname is None: dstfname = fname.name dst = static_folder.joinpath(dstfname) if dst.exists() and not force: logger.debug("\t\tSkipping existing companion {}".format(dstfname)) return # optimization based on mime/extension if ext in (".png", ".jpg", ".jpeg", ".gif"): logger.info("\t\tCopying and optimizing image companion {}".format(fname)) optimize_image(src, dst) if dst.name == (f"{book.id}_cover_image.jpg") and s3_storage: upload_to_cache( asset=dst, book_format="cover", book_id=book.id, etag=book.cover_etag, s3_storage=s3_storage, optimizer_version=optimizer_version, ) update_download_cache(src, dst) elif html_file_list: html_file_list.append(dst) update_download_cache(src, dst) elif ext == ".epub": logger.info("\t\tCreating optimized EPUB file {}".format(fname)) tmp_epub = tempfile.NamedTemporaryFile(suffix=".epub", dir=TMP_FOLDER) tmp_epub.close() try: optimize_epub(src, tmp_epub.name) except zipfile.BadZipFile: logger.warn( "\t\tBad zip file. " "Copying as it might be working{}".format(fname) ) handle_companion_file(fname, dstfname, book, force, as_ext=".zip") else: path(tmp_epub.name).move(dst) if s3_storage: upload_to_cache( asset=dst, book_format="epub", book_id=book.id, etag=book.epub_etag, s3_storage=s3_storage, optimizer_version=optimizer_version, ) update_download_cache(src, dst) else: # excludes files created by Windows Explorer if src.name.endswith("_Thumbs.db"): return # copy otherwise (PDF mostly) logger.info("\t\tCopying companion file to {}".format(dst)) copy_file(src, dst) if ext != ".pdf" and ext != ".zip" and html_file_list: html_file_list.append(dst) update_download_cache(src, dst) # associated files (images, etc) for fpath in src_dir.iterdir(): if fpath.is_file() and fpath.name.startswith(f"{book.id}_"): if fpath.suffix in (".html", ".htm"): src = fpath dst = static_folder.joinpath(fpath.name) if dst.exists() and not force: logger.debug("\t\tSkipping existing HTML {}".format(dst)) continue logger.info("\t\tExporting HTML file to {}".format(dst)) html, _ = read_file(src) new_html = update_html_for_static(book=book, html_content=html) save_bs_output(new_html, dst, UTF8) html_book_optimized_files.append(dst) update_download_cache(src, dst) else: try: handle_companion_file( fpath, force=force, html_file_list=html_book_optimized_files, s3_storage=s3_storage, book=book, ) except Exception as e: logger.exception(e) logger.error( "\t\tException while handling companion file: {}".format(e) ) if s3_storage and html_book_optimized_files: upload_to_cache( asset=html_book_optimized_files, book_format="html", etag=book.html_etag, book_id=book.id, s3_storage=s3_storage, optimizer_version=optimizer_version, ) # other formats for format in formats: if format not in book.formats() or format == "html": continue book_file = src_dir.joinpath(fname_for(book, format)) if book_file.exists(): try: handle_companion_file( book_file, archive_name_for(book, format), force=force, book=book, s3_storage=s3_storage, ) except Exception as e: logger.exception(e) logger.error( "\t\tException while handling companion file: {}".format(e) )
def optimize_epub(src, dst): logger.info("\t\tCreating ePUB off {} at {}".format(src, dst)) zipped_files = [] # create temp directory to extract to tmpd = tempfile.mkdtemp(dir=TMP_FOLDER) with zipfile.ZipFile(src, 'r') as zf: zipped_files = zf.namelist() zf.extractall(tmpd) remove_cover = False for fname in zipped_files: fnp = os.path.join(tmpd, fname) if path(fname).ext in ('.png', '.jpeg', '.jpg', '.gif'): # special case to remove ugly cover if fname.endswith('cover.jpg') and is_bad_cover(fnp): zipped_files.remove(fname) remove_cover = True else: optimize_image(fnp, fnp) if path(fname).ext in ('.htm', '.html'): html_content, html_encoding = read_file(fnp) html = update_html_for_static(book=book, html_content=html_content, epub=True) save_bs_output(html, fnp, UTF8) if path(fname).ext == '.ncx': pattern = "*** START: FULL LICENSE ***" ncx, ncx_encoding = read_file(fnp) soup = BeautifulSoup(ncx, 'lxml-xml') for tag in soup.findAll('text'): if pattern in tag.text: s = tag.parent.parent s.decompose() for s in s.next_siblings: s.decompose() s.next_sibling save_bs_output(soup, fnp, UTF8) # delete {id}/cover.jpg if exist and update {id}/content.opf if remove_cover: # remove cover path( os.path.join(tmpd, text_type(book.id), 'cover.jpg')).unlink_p() soup = None opff = os.path.join(tmpd, text_type(book.id), 'content.opf') if os.path.exists(opff): opff_content, opff_encoding = read_file(opff) soup = BeautifulSoup(opff_content, 'lxml-xml') for elem in soup.findAll(): if getattr(elem, 'attrs', {}).get('href') == 'cover.jpg': elem.decompose() save_bs_output(soup, opff, UTF8) # bundle epub as zip zip_epub(epub_fpath=dst, root_folder=tmpd, fpaths=zipped_files) path(tmpd).rmtree_p()
def export_book_to(book, static_folder, download_cache, cached_files, languages, formats, books, project_id, force=False): logger.info("\tExporting Book #{id}.".format(id=book.id)) # actual book content, as HTML html, encoding = html_content_for(book=book, static_folder=static_folder, download_cache=download_cache) if html: article_fpath = os.path.join(static_folder, article_name_for(book)) if not path(article_fpath).exists() or force: logger.info("\t\tExporting to {}".format(article_fpath)) try: new_html = update_html_for_static(book=book, html_content=html) except Exception: raise new_html = html save_bs_output(new_html, article_fpath, UTF8) else: logger.info("\t\tSkipping HTML article {}".format(article_fpath)) def symlink_from_cache(fname, dstfname=None): src = os.path.join(path(download_cache).abspath(), fname) if dstfname is None: dstfname = fname dst = os.path.join(path(static_folder).abspath(), dstfname) logger.info("\t\tSymlinking {}".format(dst)) path(dst).unlink_p() try: path(src).link(dst) # hard link except IOError: logger.error("/!\\ Unable to symlink missing file {}".format(src)) return def copy_from_cache(fname, dstfname=None): src = os.path.join(path(download_cache).abspath(), fname) if dstfname is None: dstfname = fname dst = os.path.join(path(static_folder).abspath(), dstfname) logger.info("\t\tCopying {}".format(dst)) path(dst).unlink_p() try: path(src).copy(dst) except IOError: logger.error("/!\\ Unable to copy missing file {}".format(src)) return def optimize_image(src, dst, force=False): if path(dst).exists() and not force: logger.info("\tSkipping image optimization for {}".format(dst)) return dst logger.info("\tOptimizing image {}".format(dst)) if path(src).ext == '.png': return optimize_png(src, dst) if path(src).ext in ('.jpg', '.jpeg'): return optimize_jpeg(src, dst) if path(src).ext == '.gif': return optimize_gif(src, dst) return dst def optimize_gif(src, dst): exec_cmd(['gifsicle', '-O3', src, '-o', dst]) def optimize_png(src, dst): exec_cmd(['pngquant', '--nofs', '--force', '--output', dst, src]) exec_cmd(['advdef', '-z', '-4', '-i', '5', dst]) def optimize_jpeg(src, dst): copy_from_cache(src, dst) exec_cmd(['jpegoptim', '--strip-all', '-m50', dst]) def optimize_epub(src, dst): logger.info("\t\tCreating ePUB off {} at {}".format(src, dst)) zipped_files = [] # create temp directory to extract to tmpd = tempfile.mkdtemp(dir=TMP_FOLDER) with zipfile.ZipFile(src, 'r') as zf: zipped_files = zf.namelist() zf.extractall(tmpd) remove_cover = False for fname in zipped_files: fnp = os.path.join(tmpd, fname) if path(fname).ext in ('.png', '.jpeg', '.jpg', '.gif'): # special case to remove ugly cover if fname.endswith('cover.jpg') and is_bad_cover(fnp): zipped_files.remove(fname) remove_cover = True else: optimize_image(fnp, fnp) if path(fname).ext in ('.htm', '.html'): html_content, html_encoding = read_file(fnp) html = update_html_for_static(book=book, html_content=html_content, epub=True) save_bs_output(html, fnp, UTF8) if path(fname).ext == '.ncx': pattern = "*** START: FULL LICENSE ***" ncx, ncx_encoding = read_file(fnp) soup = BeautifulSoup(ncx, 'lxml-xml') for tag in soup.findAll('text'): if pattern in tag.text: s = tag.parent.parent s.decompose() for s in s.next_siblings: s.decompose() s.next_sibling save_bs_output(soup, fnp, UTF8) # delete {id}/cover.jpg if exist and update {id}/content.opf if remove_cover: # remove cover path( os.path.join(tmpd, text_type(book.id), 'cover.jpg')).unlink_p() soup = None opff = os.path.join(tmpd, text_type(book.id), 'content.opf') if os.path.exists(opff): opff_content, opff_encoding = read_file(opff) soup = BeautifulSoup(opff_content, 'lxml-xml') for elem in soup.findAll(): if getattr(elem, 'attrs', {}).get('href') == 'cover.jpg': elem.decompose() save_bs_output(soup, opff, UTF8) # bundle epub as zip zip_epub(epub_fpath=dst, root_folder=tmpd, fpaths=zipped_files) path(tmpd).rmtree_p() def handle_companion_file(fname, dstfname=None, book=None, force=False, as_ext=None): ext = path(fname).ext if as_ext is None else as_ext src = os.path.join(path(download_cache).abspath(), fname) if dstfname is None: dstfname = fname dst = os.path.join(path(static_folder).abspath(), dstfname) if path(dst).exists() and not force: logger.debug("\t\tSkipping existing companion {}".format(dstfname)) return # optimization based on mime/extension if ext in ('.png', '.jpg', '.jpeg', '.gif'): logger.info("\t\tCopying and optimizing image companion {}" .format(fname)) # copy_from_cache(src, dst) optimize_image(src, dst) elif ext == '.epub': logger.info("\t\tCreating optimized EPUB file {}".format(fname)) tmp_epub = tempfile.NamedTemporaryFile(suffix='.epub', dir=TMP_FOLDER) tmp_epub.close() try: optimize_epub(src, tmp_epub.name) except zipfile.BadZipFile: logger.warn("\t\tBad zip file. " "Copying as it might be working{}".format(fname)) handle_companion_file(fname, dstfname, book, force, as_ext='zip') else: path(tmp_epub.name).move(dst) else: # excludes files created by Windows Explorer if src.endswith('_Thumbs.db'): return # copy otherwise (PDF mostly) logger.debug("\t\tshitty ext: {}".format(dst)) logger.info("\t\tCopying companion file to {}".format(fname)) copy_from_cache(src, dst) # associated files (images, etc) for fname in [fn for fn in cached_files if fn.startswith("{}_".format(book.id))]: if path(fname).ext in ('.html', '.htm'): src = os.path.join(path(download_cache).abspath(), fname) dst = os.path.join(path(static_folder).abspath(), fname) if path(dst).exists() and not force: logger.debug("\t\tSkipping existing HTML {}".format(dst)) continue logger.info("\t\tExporting HTML file to {}".format(dst)) html, encoding = read_file(src) new_html = update_html_for_static(book=book, html_content=html) save_bs_output(new_html, dst, UTF8) else: try: handle_companion_file(fname, force=force) except Exception as e: logger.exception(e) logger.error("\t\tException while handling companion file: {}" .format(e)) # other formats for format in formats: if format not in book.formats() or format == 'html': continue try: handle_companion_file(fname_for(book, format), archive_name_for(book, format), force=force) except Exception as e: logger.exception(e) logger.error("\t\tException while handling companion file: {}" .format(e)) # book presentation article cover_fpath = os.path.join(static_folder, article_name_for(book=book, cover=True)) if not path(cover_fpath).exists() or force: logger.info("\t\tExporting to {}".format(cover_fpath)) html = cover_html_content_for(book=book, static_folder=static_folder, books=books, project_id=project_id) with open(cover_fpath, 'w') as f: if six.PY2: f.write(html.encode(UTF8)) else: f.write(html) else: logger.info("\t\tSkipping cover {}".format(cover_fpath))
def export_book_to(book, static_folder, download_cache, cached_files, languages, formats, books, project_id, force=False): logger.info("\tExporting Book #{id}.".format(id=book.id)) # actual book content, as HTML html, encoding = html_content_for(book=book, static_folder=static_folder, download_cache=download_cache) if html: article_fpath = os.path.join(static_folder, article_name_for(book)) if not path(article_fpath).exists() or force: logger.info("\t\tExporting to {}".format(article_fpath)) try: new_html = update_html_for_static(book=book, html_content=html) except: raise new_html = html save_bs_output(new_html, article_fpath, UTF8) else: logger.info("\t\tSkipping HTML article {}".format(article_fpath)) def symlink_from_cache(fname, dstfname=None): src = os.path.join(path(download_cache).abspath(), fname) if dstfname is None: dstfname = fname dst = os.path.join(path(static_folder).abspath(), dstfname) logger.info("\t\tSymlinking {}".format(dst)) path(dst).unlink_p() try: path(src).link(dst) # hard link except IOError: logger.error("/!\ Unable to symlink missing file {}".format(src)) return def copy_from_cache(fname, dstfname=None): src = os.path.join(path(download_cache).abspath(), fname) if dstfname is None: dstfname = fname dst = os.path.join(path(static_folder).abspath(), dstfname) logger.info("\t\tCopying {}".format(dst)) path(dst).unlink_p() try: path(src).copy(dst) except IOError: logger.error("/!\ Unable to copy missing file {}".format(src)) return def optimize_image(src, dst, force=False): if path(dst).exists() and not force: logger.info("\tSkipping image optimization for {}".format(dst)) return dst logger.info("\tOptimizing image {}".format(dst)) if path(src).ext == '.png': return optimize_png(src, dst) if path(src).ext in ('.jpg', '.jpeg'): return optimize_jpeg(src, dst) if path(src).ext == '.gif': return optimize_gif(src, dst) return dst def optimize_gif(src, dst): exec_cmd(['gifsicle', '-O3', src, '-o', dst]) def optimize_png(src, dst): exec_cmd(['pngquant', '--nofs', '--force', '--output', dst, src]) exec_cmd(['advdef', '-z', '-4', '-i', '5', dst]) def optimize_jpeg(src, dst): copy_from_cache(src, dst) exec_cmd(['jpegoptim', '--strip-all', '-m50', dst]) def optimize_epub(src, dst): logger.info("\t\tCreating ePUB off {} at {}".format(src, dst)) zipped_files = [] # create temp directory to extract to tmpd = tempfile.mkdtemp(dir=TMP_FOLDER) with zipfile.ZipFile(src, 'r') as zf: zipped_files = zf.namelist() zf.extractall(tmpd) remove_cover = False for fname in zipped_files: fnp = os.path.join(tmpd, fname) if path(fname).ext in ('.png', '.jpeg', '.jpg', '.gif'): # special case to remove ugly cover if fname.endswith('cover.jpg') and is_bad_cover(fnp): zipped_files.remove(fname) remove_cover = True else: optimize_image(fnp, fnp) if path(fname).ext in ('.htm', '.html'): html_content, html_encoding = read_file(fnp) html = update_html_for_static(book=book, html_content=html_content, epub=True) save_bs_output(html, fnp, UTF8) if path(fname).ext == '.ncx': pattern = "*** START: FULL LICENSE ***" ncx, ncx_encoding = read_file(fnp) soup = BeautifulSoup(ncx, 'lxml-xml') for tag in soup.findAll('text'): if pattern in tag.text: s = tag.parent.parent s.decompose() for s in s.next_siblings: s.decompose() s.next_sibling save_bs_output(soup, fnp, UTF8) # delete {id}/cover.jpg if exist and update {id}/content.opf if remove_cover: # remove cover path( os.path.join(tmpd, text_type(book.id), 'cover.jpg')).unlink_p() soup = None opff = os.path.join(tmpd, text_type(book.id), 'content.opf') if os.path.exists(opff): opff_content, opff_encoding = read_file(opff) soup = BeautifulSoup(opff_content, 'lxml-xml') for elem in soup.findAll(): if getattr(elem, 'attrs', {}).get('href') == 'cover.jpg': elem.decompose() save_bs_output(soup, opff, UTF8) # bundle epub as zip zip_epub(epub_fpath=dst, root_folder=tmpd, fpaths=zipped_files) path(tmpd).rmtree_p() def handle_companion_file(fname, dstfname=None, book=None, force=False, as_ext=None): ext = path(fname).ext if as_ext is None else as_ext src = os.path.join(path(download_cache).abspath(), fname) if dstfname is None: dstfname = fname dst = os.path.join(path(static_folder).abspath(), dstfname) if path(dst).exists() and not force: logger.debug("\t\tSkipping existing companion {}".format(dstfname)) return # optimization based on mime/extension if ext in ('.png', '.jpg', '.jpeg', '.gif'): logger.info("\t\tCopying and optimizing image companion {}" .format(fname)) # copy_from_cache(src, dst) optimize_image(src, dst) elif ext == '.epub': logger.info("\t\tCreating optimized EPUB file {}".format(fname)) tmp_epub = tempfile.NamedTemporaryFile(suffix='.epub', dir=TMP_FOLDER) tmp_epub.close() try: optimize_epub(src, tmp_epub.name) except zipfile.BadZipFile: logger.warn("\t\tBad zip file. " "Copying as it might be working{}".format(fname)) handle_companion_file(fname, dstfname, book, force, as_ext='zip') else: path(tmp_epub.name).move(dst) else: # excludes files created by Windows Explorer if src.endswith('_Thumbs.db'): return # copy otherwise (PDF mostly) logger.debug("\t\tshitty ext: {}".format(dst)) logger.info("\t\tCopying companion file to {}".format(fname)) copy_from_cache(src, dst) # associated files (images, etc) for fname in [fn for fn in cached_files if fn.startswith("{}_".format(book.id))]: if path(fname).ext in ('.html', '.htm'): src = os.path.join(path(download_cache).abspath(), fname) dst = os.path.join(path(static_folder).abspath(), fname) if path(dst).exists() and not force: logger.debug("\t\tSkipping existing HTML {}".format(dst)) continue logger.info("\t\tExporting HTML file to {}".format(dst)) html, encoding = read_file(src) new_html = update_html_for_static(book=book, html_content=html) save_bs_output(new_html, dst, UTF8) else: try: handle_companion_file(fname, force=force) except Exception as e: logger.exception(e) logger.error("\t\tException while handling companion file: {}" .format(e)) # other formats for format in formats: if format not in book.formats() or format == 'html': continue try: handle_companion_file(fname_for(book, format), archive_name_for(book, format), force=force) except Exception as e: logger.exception(e) logger.error("\t\tException while handling companion file: {}" .format(e)) # book presentation article cover_fpath = os.path.join(static_folder, article_name_for(book=book, cover=True)) if not path(cover_fpath).exists() or force: logger.info("\t\tExporting to {}".format(cover_fpath)) html = cover_html_content_for(book=book, static_folder=static_folder, books=books, project_id=project_id) with open(cover_fpath, 'w') as f: if six.PY2: f.write(html.encode(UTF8)) else: f.write(html) else: logger.info("\t\tSkipping cover {}".format(cover_fpath))