def handle_companion_file(fname, dstfname=None, book=None): src = os.path.join(path(download_cache).abspath(), fname) if dstfname is None: dstfname = fname dst = os.path.join(path(static_folder).abspath(), dstfname) # optimization based on mime/extension if path(fname).ext in ('.png', '.jpg', '.jpeg', '.gif'): copy_from_cache(src, dst) optimize_image(path_for_cmd(dst)) elif path(fname).ext == '.epub': tmp_epub = tempfile.NamedTemporaryFile(suffix='.epub', dir=TMP_FOLDER) tmp_epub.close() optimize_epub(src, tmp_epub.name) path(tmp_epub.name).move(dst) else: # excludes files created by Windows Explorer if src.endswith('_Thumbs.db'): return # copy otherwise (PDF mostly) logger.debug("\t\tshitty ext: {}".format(dst)) copy_from_cache(src, dst)
def optimize_epub(src, dst): logger.info("\t\tCreating ePUB at {}".format(dst)) zipped_files = [] # create temp directory to extract to tmpd = tempfile.mkdtemp(dir=TMP_FOLDER) with zipfile.ZipFile(src, 'r') as zf: zipped_files = zf.namelist() zf.extractall(tmpd) remove_cover = False for fname in zipped_files: fnp = os.path.join(tmpd, fname) if path(fname).ext in ('.png', '.jpeg', '.jpg', '.gif'): # special case to remove ugly cover if fname.endswith('cover.jpg') and is_bad_cover(fnp): zipped_files.remove(fname) remove_cover = True else: optimize_image(path_for_cmd(fnp)) if path(fname).ext in ('.htm', '.html'): f = open(fnp, 'r') html = update_html_for_static(book=book, html_content=f.read(), epub=True) f.close() with open(fnp, 'w') as f: f.write(html) if path(fname).ext == '.ncx': pattern = "*** START: FULL LICENSE ***" f = open(fnp, 'r') ncx = f.read() f.close() soup = BeautifulSoup(ncx, ["lxml", "xml"]) for tag in soup.findAll('text'): if pattern in tag.text: s = tag.parent.parent s.decompose() for s in s.next_siblings: s.decompose() s.next_sibling with open(fnp, 'w') as f: f.write(soup.encode()) # delete {id}/cover.jpg if exist and update {id}/content.opf if remove_cover: # remove cover path(os.path.join(tmpd, str(book.id), 'cover.jpg')).unlink_p() soup = None opff = os.path.join(tmpd, str(book.id), 'content.opf') if os.path.exists(opff): with open(opff, 'r') as fd: soup = BeautifulSoup(fd.read(), ["lxml", "xml"]) for elem in soup.findAll(): if getattr(elem, 'attrs', {}).get('href') == 'cover.jpg': elem.decompose() with (open(opff, 'w')) as fd: fd.write(soup.encode()) with cd(tmpd): exec_cmd('zip -q0X "{dst}" mimetype'.format(dst=path_for_cmd(dst))) exec_cmd('zip -qXr9D "{dst}" {files}'.format( dst=path_for_cmd(dst), files=" ".join( [f for f in zipped_files if not f == 'mimetype']))) path(tmpd).rmtree_p()
def optimize_epub(src, dst): logger.info("\t\tCreating ePUB at {}".format(dst)) zipped_files = [] # create temp directory to extract to tmpd = tempfile.mkdtemp(dir=TMP_FOLDER) with zipfile.ZipFile(src, 'r') as zf: zipped_files = zf.namelist() zf.extractall(tmpd) remove_cover = False for fname in zipped_files: fnp = os.path.join(tmpd, fname) if path(fname).ext in ('.png', '.jpeg', '.jpg', '.gif'): # special case to remove ugly cover if fname.endswith('cover.jpg') and is_bad_cover(fnp): zipped_files.remove(fname) remove_cover = True else: optimize_image(path_for_cmd(fnp)) if path(fname).ext in ('.htm', '.html'): f = open(fnp, 'r') html = update_html_for_static(book=book, html_content=f.read(), epub=True) f.close() with open(fnp, 'w') as f: f.write(html) if path(fname).ext == '.ncx': pattern = "*** START: FULL LICENSE ***" f = open(fnp, 'r') ncx = f.read() f.close() soup = BeautifulSoup(ncx, ["lxml", "xml"]) for tag in soup.findAll('text'): if pattern in tag.text: s = tag.parent.parent s.decompose() for s in s.next_siblings: s.decompose() s.next_sibling with open(fnp, 'w') as f: f.write(soup.encode()) # delete {id}/cover.jpg if exist and update {id}/content.opf if remove_cover: # remove cover path(os.path.join(tmpd, str(book.id), 'cover.jpg')).unlink_p() soup = None opff = os.path.join(tmpd, str(book.id), 'content.opf') if os.path.exists(opff): with open(opff, 'r') as fd: soup = BeautifulSoup(fd.read(), ["lxml", "xml"]) for elem in soup.findAll(): if getattr(elem, 'attrs', {}).get('href') == 'cover.jpg': elem.decompose() with(open(opff, 'w')) as fd: fd.write(soup.encode()) with cd(tmpd): exec_cmd('zip -q0X "{dst}" mimetype'.format(dst=path_for_cmd(dst))) exec_cmd('zip -qXr9D "{dst}" {files}' .format(dst=path_for_cmd(dst), files=" ".join([f for f in zipped_files if not f == 'mimetype']))) path(tmpd).rmtree_p()