def test(): # {{{ # TODO(gryf): move this test to separate file. from ebook_converter.ptempfile import TemporaryDirectory from ebook_converter import CurrentDir from glob import glob # TODO(gryf): make the sample image out of pillow or smth # img = image_from_data(I('lt.png', data=True, allow_user_override=False)) with TemporaryDirectory() as tdir, CurrentDir(tdir): save_image(img, 'test.jpg') ret = optimize_jpeg('test.jpg') if ret is not None: raise SystemExit('optimize_jpeg failed: %s' % ret) ret = encode_jpeg('test.jpg') if ret is not None: raise SystemExit('encode_jpeg failed: %s' % ret) # TODO(gryf): make the sample image out of pillow or smth. for sure # tempfile would be better idea. #shutil.copyfile(I('lt.png'), 'test.png') ret = optimize_png('test.png') if ret is not None: raise SystemExit('optimize_png failed: %s' % ret) if glob('*.bak'): raise SystemExit('Spurious .bak files left behind') quantize_image(img) oil_paint_image(img) gaussian_sharpen_image(img) gaussian_blur_image(img) despeckle_image(img) remove_borders_from_image(img) image_to_data(img, fmt='GIF') raw = subprocess.Popen([get_exe_path('JxrDecApp'), '-h'], creationflags=0, stdout=subprocess.PIPE).stdout.read() if b'JPEG XR Decoder Utility' not in raw: raise SystemExit('Failed to run JxrDecApp')
def convert_text(self, oeb_book): import json from ebook_converter.ebooks.pdf.html_writer import convert self.get_cover_data() self.process_fonts() if self.opts.pdf_use_document_margins and self.stored_page_margins: for href, margins in self.stored_page_margins.items(): item = oeb_book.manifest.hrefs.get(href) if item is not None: root = item.data if hasattr(root, 'xpath') and margins: root.set('data-calibre-pdf-output-page-margins', json.dumps(margins)) with TemporaryDirectory('_pdf_out') as oeb_dir: from ebook_converter.customize.ui import plugin_for_output_format oeb_dir = os.path.realpath(oeb_dir) oeb_output = plugin_for_output_format('oeb') oeb_output.convert(oeb_book, oeb_dir, self.input_plugin, self.opts, self.log) opfpath = glob.glob(os.path.join(oeb_dir, '*.opf'))[0] convert(opfpath, self.opts, metadata=self.metadata, output_path=self.output_path, log=self.log, cover_data=self.cover_data, report_progress=self.report_progress)
def __enter__(self, *args): """ Add this plugin to the python path so that it's contents become directly importable. Useful when bundling large python libraries into the plugin. Use it like this:: with plugin: import something """ if self.plugin_path is not None: from ebook_converter.utils.zipfile import ZipFile zf = ZipFile(self.plugin_path) extensions = {x.rpartition('.')[-1].lower() for x in zf.namelist()} zip_safe = True for ext in ('pyd', 'so', 'dll', 'dylib'): if ext in extensions: zip_safe = False break if zip_safe: sys.path.insert(0, self.plugin_path) self.sys_insertion_path = self.plugin_path else: from ebook_converter.ptempfile import TemporaryDirectory self._sys_insertion_tdir = TemporaryDirectory('plugin_unzip') self.sys_insertion_path = (self._sys_insertion_tdir. __enter__(*args)) zf.extractall(self.sys_insertion_path) sys.path.insert(0, self.sys_insertion_path) zf.close()
def render_html_data(path_to_html, width, height): from ebook_converter.ptempfile import TemporaryDirectory from ebook_converter.utils.ipc.simple_worker import fork_job, WorkerError result = {} def report_error(text=''): print(f'Failed to render {path_to_html}') # file=sys.stderr) if text: print(text) # , file=sys.stderr) if result and result['stdout_stderr']: with open(result['stdout_stderr'], 'rb') as f: print(f.read()) # , file=sys.stderr) with TemporaryDirectory('-render-html') as tdir: try: result = fork_job('ebook_converter.ebooks.render_html', 'main', args=(path_to_html, tdir, 'jpeg')) except WorkerError as e: report_error(e.orig_tb) else: if result['result']: with open(os.path.join(tdir, 'rendered.jpeg'), 'rb') as f: return f.read() else: report_error()
def convert(self, stream, options, file_ext, log, accelerators): from ebook_converter.ebooks.metadata.toc import TOC from ebook_converter.ebooks.metadata.opf2 import OPFCreator from ebook_converter.utils.zipfile import ZipFile self.options = options self.log = log pages, images = [], [] toc = TOC() if file_ext == 'pmlz': log.debug('De-compressing content to temporary directory...') with TemporaryDirectory('_unpmlz') as tdir: zf = ZipFile(stream) zf.extractall(tdir) pmls = glob.glob(os.path.join(tdir, '*.pml')) for pml in pmls: html_name = os.path.splitext( os.path.basename(pml))[0] + '.html' html_path = os.path.join(os.getcwd(), html_name) pages.append(html_name) log.debug('Processing PML item %s...', pml) ttoc = self.process_pml(pml, html_path) toc += ttoc images = self.get_images(stream, tdir, True) else: toc = self.process_pml(stream, 'index.html') pages.append('index.html') if hasattr(stream, 'name'): images = self.get_images( stream, os.path.abspath(os.path.dirname(stream.name))) # We want pages to be orded alphabetically. pages.sort() manifest_items = [] for item in pages + images: manifest_items.append((item, None)) from ebook_converter.ebooks.metadata.meta import get_metadata log.debug('Reading metadata from input file...') mi = get_metadata(stream, 'pml') if 'images/cover.png' in images: mi.cover = 'images/cover.png' opf = OPFCreator(os.getcwd(), mi) log.debug('Generating manifest...') opf.create_manifest(manifest_items) opf.create_spine(pages) opf.set_toc(toc) with open('metadata.opf', 'wb') as opffile: with open('toc.ncx', 'wb') as tocfile: opf.render(opffile, tocfile, 'toc.ncx') return os.path.join(os.getcwd(), 'metadata.opf')
def get_metadata(stream, cover=True): with TemporaryDirectory('_pdf_metadata_read') as pdfpath: stream.seek(0) with open(os.path.join(pdfpath, 'src.pdf'), 'wb') as f: shutil.copyfileobj(stream, f) info = read_info(pdfpath, bool(cover)) if info is None: raise ValueError('Could not read info dict from PDF') covpath = os.path.join(pdfpath, 'cover.jpg') cdata = None if cover and os.path.exists(covpath): with open(covpath, 'rb') as f: cdata = f.read() title = info.get('Title', None) or 'Unknown' au = info.get('Author', None) if au is None: au = ['Unknown'] else: au = string_to_authors(au) mi = MetaInformation(title, au) creator = info.get('Creator', None) if creator: mi.book_producer = creator keywords = info.get('Keywords', None) mi.tags = [] if keywords: mi.tags = [x.strip() for x in keywords.split(',')] isbn = [check_isbn(x) for x in mi.tags if check_isbn(x)] if isbn: mi.isbn = isbn = isbn[0] mi.tags = [x for x in mi.tags if check_isbn(x) != isbn] subject = info.get('Subject', None) if subject: mi.tags.insert(0, subject) if 'xmp_metadata' in info: from ebook_converter.ebooks.metadata.xmp import consolidate_metadata mi = consolidate_metadata(mi, info) # Look for recognizable identifiers in the info dict, if they were not # found in the XMP metadata for scheme, check_func in {'doi': check_doi, 'isbn': check_isbn}.items(): if scheme not in mi.get_identifiers(): for k, v in info.items(): if k != 'xmp_metadata': val = check_func(v) if val: mi.set_identifier(scheme, val) break if cdata: mi.cover_data = ('jpeg', cdata) return mi
def convert(self, stream, options, file_ext, log, accelerators): # NOTE(gryf): for some reason, those import cannot be moved to the top # of module. from ebook_converter.ebooks.chm.metadata import get_metadata_from_reader from ebook_converter.customize.ui import plugin_for_input_format self.opts = options log.debug('Processing CHM...') with TemporaryDirectory('_chm2oeb') as tdir: if not isinstance(tdir, str): tdir = tdir.decode(filesystem_encoding) html_input = plugin_for_input_format('html') for opt in html_input.options: setattr(options, opt.option.name, opt.recommended_value) no_images = False # options.no_images chm_name = stream.name # chm_data = stream.read() # closing stream so CHM can be opened by external library stream.close() log.debug('tdir=%s', tdir) log.debug('stream.name=%s', stream.name) debug_dump = False odi = options.debug_pipeline if odi: debug_dump = os.path.join(odi, 'input') mainname = self._chmtohtml(tdir, chm_name, no_images, log, debug_dump=debug_dump) mainpath = os.path.join(tdir, mainname) try: metadata = get_metadata_from_reader(self._chm_reader) except Exception: log.exception('Failed to read metadata, using filename') from ebook_converter.ebooks.metadata.book.base import Metadata metadata = Metadata(os.path.basename(chm_name)) encoding = (self._chm_reader.get_encoding() or options.input_encoding or 'cp1252') self._chm_reader.CloseCHM() options.debug_pipeline = None options.input_encoding = 'utf-8' uenc = encoding if os.path.abspath(mainpath) in self._chm_reader.re_encoded_files: uenc = 'utf-8' htmlpath, toc = self._create_html_root(mainpath, log, uenc) oeb = self._create_oebbook_html(htmlpath, tdir, options, log, metadata) options.debug_pipeline = odi if toc.count() > 1: oeb.toc = self.parse_html_toc(oeb.spine[0]) oeb.manifest.remove(oeb.spine[0]) oeb.auto_generated_toc = False return oeb
def load_jxr_data(data): with TemporaryDirectory() as tdir: with open(os.path.join(tdir, 'input.jxr'), 'wb') as f: f.write(data) cmd = [get_exe_path('JxrDecApp'), '-i', 'input.jxr', '-o', 'output.tif'] creationflags = 0 subprocess.Popen(cmd, cwd=tdir, stdout=open(os.devnull, 'wb'), stderr=subprocess.STDOUT, creationflags=creationflags).wait() i = QImage() if not i.load(os.path.join(tdir, 'output.tif')): raise NotImage('Failed to convert JPEG-XR image') return i
def _cover_from_html(self, hcover): from ebook_converter.ebooks import render_html_svg_workaround with TemporaryDirectory('_html_cover') as tdir: writer = OEBWriter() writer(self.oeb, tdir) path = os.path.join(tdir, unquote(hcover.href)) data = render_html_svg_workaround(path, self.logger) if not data: data = b'' id, href = self.oeb.manifest.generate('cover', 'cover.jpg') item = self.oeb.manifest.add(id, href, base.JPEG_MIME, data=data) return item
def test_shared_file(self): eq = self.assertEqual with TemporaryDirectory() as tdir: fname = os.path.join(tdir, 'test.txt') with share_open(fname, 'wb') as f: f.write(b'a' * 20 * 1024) eq(fname, f.name) f = share_open(fname, 'rb') eq(f.read(1), b'a') os.remove(fname) eq(f.read(1), b'a') f2 = share_open(fname, 'w+b') f2.write(b'b' * 10 * 1024) f2.seek(0) eq(f.read(10000), b'a'*10000) eq(f2.read(100), b'b' * 100) f3 = share_open(fname, 'rb') eq(f3.read(100), b'b' * 100)
def convert(self, oeb_book, output_path, input_plugin, opts, log): from ebook_converter.ebooks.pml.pmlml import PMLMLizer from ebook_converter.utils.zipfile import ZipFile with TemporaryDirectory('_pmlz_output') as tdir: pmlmlizer = PMLMLizer(log) pml = str(pmlmlizer.extract_content(oeb_book, opts)) with open(os.path.join(tdir, 'index.pml'), 'wb') as out: out.write(pml.encode(opts.pml_output_encoding, 'replace')) img_path = os.path.join(tdir, 'index_img') if not os.path.exists(img_path): os.makedirs(img_path) self.write_images(oeb_book.manifest, pmlmlizer.image_hrefs, img_path, opts) log.debug('Compressing output...') pmlz = ZipFile(output_path, 'w') pmlz.add_dir(tdir)
def convert(self, oeb, output_path, input_plugin, opts, log): self.log, self.opts, self.oeb = log, opts, oeb lrf_opts = LRFOptions(output_path, opts, oeb) if input_plugin.is_image_collection: self.convert_images(input_plugin.get_images(), lrf_opts, getattr(opts, 'wide', False)) return self.flatten_toc() from ebook_converter.ptempfile import TemporaryDirectory with TemporaryDirectory('_lrf_output') as tdir: from ebook_converter.customize.ui import plugin_for_output_format oeb_output = plugin_for_output_format('oeb') oeb_output.convert(oeb, tdir, input_plugin, opts, log) opf = [x for x in os.listdir(tdir) if x.endswith('.opf')][0] from ebook_converter.ebooks.lrf.html.convert_from import process_file process_file(os.path.join(tdir, opf), lrf_opts, self.log)
def convert(self, oeb_book, output_path, input_plugin, opts, log): from ebook_converter.ebooks.oeb.base import OEB_IMAGES from ebook_converter.utils.zipfile import ZipFile from lxml import etree with TemporaryDirectory('_txtz_output') as tdir: # TXT txt_name = 'index.txt' if opts.txt_output_formatting.lower() == 'textile': txt_name = 'index.text' with TemporaryFile(txt_name) as tf: TXTOutput.convert(self, oeb_book, tf, input_plugin, opts, log) shutil.copy(tf, os.path.join(tdir, txt_name)) # Images for item in oeb_book.manifest: if item.media_type in OEB_IMAGES: if hasattr(self.writer, 'images'): path = os.path.join(tdir, 'images') if item.href in self.writer.images: href = self.writer.images[item.href] else: continue else: path = os.path.join(tdir, os.path.dirname(item.href)) href = os.path.basename(item.href) if not os.path.exists(path): os.makedirs(path) with open(os.path.join(path, href), 'wb') as imgf: imgf.write(item.data) # Metadata with open(os.path.join(tdir, 'metadata.opf'), 'wb') as mdataf: mdataf.write(etree.tostring(oeb_book.metadata.to_opf1())) txtz = ZipFile(output_path, 'w') txtz.add_dir(tdir)
def run(self, htmlfile): import codecs from ebook_converter.ptempfile import TemporaryDirectory from ebook_converter.gui2.convert.gui_conversion import gui_convert from ebook_converter.customize.conversion import OptionRecommendation from ebook_converter.ebooks.epub import initialize_container with TemporaryDirectory('_plugin_html2zip') as tdir: recs =[('debug_pipeline', tdir, OptionRecommendation.HIGH)] recs.append(['keep_ligatures', True, OptionRecommendation.HIGH]) if self.site_customization and self.site_customization.strip(): sc = self.site_customization.strip() enc, _, bf = sc.partition('|') if enc: try: codecs.lookup(enc) except Exception: print('Ignoring invalid input encoding for HTML: %s', enc) else: recs.append(['input_encoding', enc, OptionRecommendation.HIGH]) if bf == 'bf': recs.append(['breadth_first', True, OptionRecommendation.HIGH]) gui_convert(htmlfile, tdir, recs, abort_after_input_dump=True) of = self.temporary_file('_plugin_html2zip.zip') tdir = os.path.join(tdir, 'input') opf = glob.glob(os.path.join(tdir, '*.opf'))[0] ncx = glob.glob(os.path.join(tdir, '*.ncx')) if ncx: os.remove(ncx[0]) epub = initialize_container(of.name, os.path.basename(opf)) epub.add_dir(tdir) epub.close() return of.name
def convert(self, oeb_book, output_path, input_plugin, opts, log): from lxml import etree from ebook_converter.ebooks.snb.snbfile import SNBFile from ebook_converter.ebooks.snb.snbml import SNBMLizer, ProcessFileName self.opts = opts from ebook_converter.ebooks.oeb.transforms.rasterize import SVGRasterizer, Unavailable try: rasterizer = SVGRasterizer() rasterizer(oeb_book, opts) except Unavailable: log.warn('SVG rasterizer unavailable, SVG will not be converted') # Create temp dir with TemporaryDirectory('_snb_output') as tdir: # Create stub directories snbfDir = os.path.join(tdir, 'snbf') snbcDir = os.path.join(tdir, 'snbc') snbiDir = os.path.join(tdir, 'snbc/images') os.mkdir(snbfDir) os.mkdir(snbcDir) os.mkdir(snbiDir) # Process Meta data meta = oeb_book.metadata if meta.title: title = str(meta.title[0]) else: title = '' authors = [str(x) for x in meta.creator if x.role == 'aut'] if meta.publisher: publishers = str(meta.publisher[0]) else: publishers = '' if meta.language: lang = str(meta.language[0]).upper() else: lang = '' if meta.description: abstract = str(meta.description[0]) else: abstract = '' # Process Cover g, m, s = oeb_book.guide, oeb_book.manifest, oeb_book.spine href = None if 'titlepage' not in g: if 'cover' in g: href = g['cover'].href # Output book info file bookInfoTree = etree.Element("book-snbf", version="1.0") headTree = etree.SubElement(bookInfoTree, "head") etree.SubElement(headTree, "name").text = title etree.SubElement(headTree, "author").text = ' '.join(authors) etree.SubElement(headTree, "language").text = lang etree.SubElement(headTree, "rights") etree.SubElement(headTree, "publisher").text = publishers etree.SubElement( headTree, "generator").text = __appname__ + ' ' + __version__ etree.SubElement(headTree, "created") etree.SubElement(headTree, "abstract").text = abstract if href is not None: etree.SubElement(headTree, "cover").text = ProcessFileName(href) else: etree.SubElement(headTree, "cover") with open(os.path.join(snbfDir, 'book.snbf'), 'wb') as f: f.write( etree.tostring(bookInfoTree, pretty_print=True, encoding='utf-8')) # Output TOC tocInfoTree = etree.Element("toc-snbf") tocHead = etree.SubElement(tocInfoTree, "head") tocBody = etree.SubElement(tocInfoTree, "body") outputFiles = {} if oeb_book.toc.count() == 0: log.warn('This SNB file has no Table of Contents. ' 'Creating a default TOC') first = next(iter(oeb_book.spine)) oeb_book.toc.add('Start page', first.href) else: first = next(iter(oeb_book.spine)) if oeb_book.toc[0].href != first.href: # The pages before the fist item in toc will be stored as # "Cover Pages". # oeb_book.toc does not support "insert", so we generate # the tocInfoTree directly instead of modifying the toc ch = etree.SubElement(tocBody, "chapter") ch.set("src", ProcessFileName(first.href) + ".snbc") ch.text = 'Cover pages' outputFiles[first.href] = [] outputFiles[first.href].append(("", "Cover pages")) for tocitem in oeb_book.toc: if tocitem.href.find('#') != -1: item = tocitem.href.split('#') if len(item) != 2: log.error('Error in TOC item: %s' % tocitem) else: if item[0] in outputFiles: outputFiles[item[0]].append( (item[1], tocitem.title)) else: outputFiles[item[0]] = [] if "" not in outputFiles[item[0]]: outputFiles[item[0]].append( ("", tocitem.title + " (Preface)")) ch = etree.SubElement(tocBody, "chapter") ch.set("src", ProcessFileName(item[0]) + ".snbc") ch.text = tocitem.title + " (Preface)" outputFiles[item[0]].append( (item[1], tocitem.title)) else: if tocitem.href in outputFiles: outputFiles[tocitem.href].append(("", tocitem.title)) else: outputFiles[tocitem.href] = [] outputFiles[tocitem.href].append(("", tocitem.title)) ch = etree.SubElement(tocBody, "chapter") ch.set("src", ProcessFileName(tocitem.href) + ".snbc") ch.text = tocitem.title etree.SubElement(tocHead, "chapters").text = '%d' % len(tocBody) with open(os.path.join(snbfDir, 'toc.snbf'), 'wb') as f: f.write( etree.tostring(tocInfoTree, pretty_print=True, encoding='utf-8')) # Output Files oldTree = None mergeLast = False lastName = None for item in s: from ebook_converter.ebooks.oeb.base import OEB_DOCS, OEB_IMAGES if m.hrefs[item.href].media_type in OEB_DOCS: if item.href not in outputFiles: log.debug( 'File %s is unused in TOC. Continue in last chapter' % item.href) mergeLast = True else: if oldTree is not None and mergeLast: log.debug('Output the modified chapter again: %s' % lastName) with open(os.path.join(snbcDir, lastName), 'wb') as f: f.write( etree.tostring(oldTree, pretty_print=True, encoding='utf-8')) mergeLast = False log.debug('Converting %s to snbc...' % item.href) snbwriter = SNBMLizer(log) snbcTrees = None if not mergeLast: snbcTrees = snbwriter.extract_content( oeb_book, item, outputFiles[item.href], opts) for subName in snbcTrees: postfix = '' if subName != '': postfix = '_' + subName lastName = ProcessFileName(item.href + postfix + ".snbc") oldTree = snbcTrees[subName] with open(os.path.join(snbcDir, lastName), 'wb') as f: f.write( etree.tostring(oldTree, pretty_print=True, encoding='utf-8')) else: log.debug('Merge %s with last TOC item...' % item.href) snbwriter.merge_content(oldTree, oeb_book, item, [('', "Start")], opts) # Output the last one if needed log.debug('Output the last modified chapter again: %s' % lastName) if oldTree is not None and mergeLast: with open(os.path.join(snbcDir, lastName), 'wb') as f: f.write( etree.tostring(oldTree, pretty_print=True, encoding='utf-8')) mergeLast = False for item in m: if m.hrefs[item.href].media_type in OEB_IMAGES: log.debug('Converting image: %s ...' % item.href) content = m.hrefs[item.href].data # Convert & Resize image self.HandleImage( content, os.path.join(snbiDir, ProcessFileName(item.href))) # Package as SNB File snbFile = SNBFile() snbFile.FromDir(tdir) snbFile.Output(output_path)
def convert(self, oeb_book, output_path, input_plugin, opts, log): from lxml import etree from ebook_converter.ebooks.oeb.base import OEB_IMAGES, SVG_MIME from ebook_converter.ebooks.metadata.opf2 import OPF, metadata_to_opf from ebook_converter.utils.zipfile import ZipFile from ebook_converter.utils.filenames import ascii_filename # HTML if opts.htmlz_css_type == 'inline': from ebook_converter.ebooks.htmlz.oeb2html import OEB2HTMLInlineCSSizer OEB2HTMLizer = OEB2HTMLInlineCSSizer elif opts.htmlz_css_type == 'tag': from ebook_converter.ebooks.htmlz.oeb2html import OEB2HTMLNoCSSizer OEB2HTMLizer = OEB2HTMLNoCSSizer else: from ebook_converter.ebooks.htmlz.oeb2html import OEB2HTMLClassCSSizer as OEB2HTMLizer with TemporaryDirectory(u'_htmlz_output') as tdir: htmlizer = OEB2HTMLizer(log) html = htmlizer.oeb2html(oeb_book, opts) fname = u'index' if opts.htmlz_title_filename: from ebook_converter.utils.filenames import shorten_components_to fname = shorten_components_to(100, (ascii_filename(str(oeb_book.metadata.title[0])),))[0] with open(os.path.join(tdir, fname+u'.html'), 'wb') as tf: if isinstance(html, str): html = html.encode('utf-8') tf.write(html) # CSS if opts.htmlz_css_type == 'class' and opts.htmlz_class_style == 'external': with open(os.path.join(tdir, u'style.css'), 'wb') as tf: tf.write(htmlizer.get_css(oeb_book)) # Images images = htmlizer.images if images: if not os.path.exists(os.path.join(tdir, u'images')): os.makedirs(os.path.join(tdir, u'images')) for item in oeb_book.manifest: if item.media_type in OEB_IMAGES and item.href in images: if item.media_type == SVG_MIME: data = etree.tostring(item.data, encoding='unicode') else: data = item.data fname = os.path.join(tdir, u'images', images[item.href]) with open(fname, 'wb') as img: img.write(data) # Cover cover_path = None try: cover_data = None if oeb_book.metadata.cover: term = oeb_book.metadata.cover[0].term cover_data = oeb_book.guide[term].item.data if cover_data: from ebook_converter.utils.img import save_cover_data_to cover_path = os.path.join(tdir, u'cover.jpg') with open(cover_path, 'w') as cf: cf.write('') save_cover_data_to(cover_data, cover_path) except: import traceback traceback.print_exc() # Metadata with open(os.path.join(tdir, u'metadata.opf'), 'wb') as mdataf: opf = OPF(io.BytesIO(etree.tostring(oeb_book.metadata.to_opf1(), encoding='UTF-8'))) mi = opf.to_book_metadata() if cover_path: mi.cover = u'cover.jpg' mdataf.write(metadata_to_opf(mi)) htmlz = ZipFile(output_path, 'w') htmlz.add_dir(tdir)
def convert(self, oeb, output_path, input_plugin, opts, log): self.log, self.opts, self.oeb = log, opts, oeb if self.opts.epub_inline_toc: from ebook_converter.ebooks.mobi.writer8.toc import TOCAdder opts.mobi_toc_at_start = not opts.epub_toc_at_end opts.mobi_passthrough = False opts.no_inline_toc = False TOCAdder(oeb, opts, replace_previous_inline_toc=True, ignore_existing_toc=True) if self.opts.epub_flatten: from ebook_converter.ebooks.oeb.transforms.filenames import FlatFilenames FlatFilenames()(oeb, opts) else: from ebook_converter.ebooks.oeb.transforms.filenames import UniqueFilenames UniqueFilenames()(oeb, opts) self.workaround_ade_quirks() self.workaround_webkit_quirks() self.upshift_markup() from ebook_converter.ebooks.oeb.transforms.rescale import RescaleImages RescaleImages(check_colorspaces=True)(oeb, opts) from ebook_converter.ebooks.oeb.transforms.split import Split split = Split(not self.opts.dont_split_on_page_breaks, max_flow_size=self.opts.flow_size*1024 ) split(self.oeb, self.opts) from ebook_converter.ebooks.oeb.transforms.cover import CoverManager cm = CoverManager( no_default_cover=self.opts.no_default_epub_cover, no_svg_cover=self.opts.no_svg_cover, preserve_aspect_ratio=self.opts.preserve_cover_aspect_ratio) cm(self.oeb, self.opts, self.log) self.workaround_sony_quirks() if self.oeb.toc.count() == 0: self.log.warn('This EPUB file has no Table of Contents. ' 'Creating a default TOC') first = next(iter(self.oeb.spine)) self.oeb.toc.add('Start', first.href) identifiers = oeb.metadata['identifier'] _uuid = None for x in identifiers: if (x.get(base.tag('opf', 'scheme'), None).lower() == 'uuid' or str(x).startswith('urn:uuid:')): _uuid = str(x).split(':')[-1] break encrypted_fonts = getattr(input_plugin, 'encrypted_fonts', []) if _uuid is None: self.log.warn('No UUID identifier found') _uuid = str(uuid.uuid4()) oeb.metadata.add('identifier', _uuid, scheme='uuid', id=_uuid) if encrypted_fonts and not _uuid.startswith('urn:uuid:'): # Apparently ADE requires this value to start with urn:uuid: # for some absurd reason, or it will throw a hissy fit and refuse # to use the obfuscated fonts. for x in identifiers: if str(x) == _uuid: x.content = 'urn:uuid:' + _uuid with TemporaryDirectory('_epub_output') as tdir: from ebook_converter.customize.ui import plugin_for_output_format metadata_xml = None extra_entries = [] if self.is_periodical: if self.opts.output_profile.epub_periodical_format == 'sony': from ebook_converter.ebooks.epub.periodical import sony_metadata metadata_xml, atom_xml = sony_metadata(oeb) extra_entries = [('atom.xml', 'application/atom+xml', atom_xml)] oeb_output = plugin_for_output_format('oeb') oeb_output.convert(oeb, tdir, input_plugin, opts, log) opf = [x for x in os.listdir(tdir) if x.endswith('.opf')][0] self.condense_ncx([os.path.join(tdir, x) for x in os.listdir(tdir) if x.endswith('.ncx')][0]) if self.opts.epub_version == '3': self.upgrade_to_epub3(tdir, opf) encryption = None if encrypted_fonts: encryption = self.encrypt_fonts(encrypted_fonts, tdir, _uuid) from ebook_converter.ebooks.epub import initialize_container with initialize_container(output_path, os.path.basename(opf), extra_entries=extra_entries) as epub: epub.add_dir(tdir) if encryption is not None: epub.writestr('META-INF/encryption.xml', as_bytes(encryption)) if metadata_xml is not None: epub.writestr('META-INF/metadata.xml', metadata_xml.encode('utf-8')) if opts.extract_to is not None: from ebook_converter.utils.zipfile import ZipFile if os.path.exists(opts.extract_to): if os.path.isdir(opts.extract_to): shutil.rmtree(opts.extract_to) else: os.remove(opts.extract_to) os.mkdir(opts.extract_to) with ZipFile(output_path) as zf: zf.extractall(path=opts.extract_to) self.log.info('EPUB extracted to', opts.extract_to)
def convert(self, stream, options, file_ext, log, accelerators): import uuid from ebook_converter.ebooks.oeb.base import DirContainer from ebook_converter.ebooks.snb.snbfile import SNBFile log.debug("Parsing SNB file...") snbFile = SNBFile() try: snbFile.Parse(stream) except Exception: raise ValueError("Invalid SNB file") if not snbFile.IsValid(): log.debug("Invalid SNB file") raise ValueError("Invalid SNB file") log.debug("Handle meta data ...") from ebook_converter.ebooks.conversion.plumber import create_oebbook oeb = create_oebbook(log, None, options, encoding=options.input_encoding, populate=False) meta = snbFile.GetFileStream('snbf/book.snbf') if meta is not None: meta = etree.fromstring(meta) item_map = {'title': './/head/name', 'creator': './/head/author', 'language': './/head/language', 'generator': './/head/generator', 'publisher': './/head/publisher', 'cover': './/head/cover'} d = {} for key, item in item_map.items(): node = meta.find(item) if node is not None: d[key] = node.text if node.text is not None else '' else: d[key] = '' oeb.metadata.add('title', d['title']) oeb.metadata.add('creator', d['creator'], attrib={'role': 'aut'}) oeb.metadata.add('language', d['language'].lower().replace('_', '-')) oeb.metadata.add('generator', d['generator']) oeb.metadata.add('publisher', d['publisher']) if d['cover'] != '': oeb.guide.add('cover', 'Cover', d['cover']) bookid = str(uuid.uuid4()) oeb.metadata.add('identifier', bookid, id='uuid_id', scheme='uuid') for ident in oeb.metadata.identifier: if 'id' in ident.attrib: oeb.uid = oeb.metadata.identifier[0] break with TemporaryDirectory('_snb2oeb', keep=True) as tdir: log.debug('Process TOC ...') toc = snbFile.GetFileStream('snbf/toc.snbf') oeb.container = DirContainer(tdir, log) if toc is not None: toc = etree.fromstring(toc) i = 1 for ch in toc.find('.//body'): chapterName = ch.text chapterSrc = ch.get('src') fname = 'ch_%d.htm' % i data = snbFile.GetFileStream('snbc/' + chapterSrc) if data is None: continue snbc = etree.fromstring(data) lines = [] for line in snbc.find('.//body'): if line.tag == 'text': lines.append('<p>%s</p>' % html_encode(line.text)) elif line.tag == 'img': lines.append('<p><img src="%s" /></p>' % html_encode(line.text)) with open(os.path.join(tdir, fname), 'wb') as f: f.write((HTML_TEMPLATE % (chapterName, '\n'.join(lines))).encode('utf-8', 'replace')) oeb.toc.add(ch.text, fname) id, href = oeb.manifest.generate( id='html', href=ascii_filename(fname)) item = oeb.manifest.add(id, href, 'text/html') item.html_input_href = fname oeb.spine.add(item, True) i = i + 1 imageFiles = snbFile.OutputImageFiles(tdir) for f, m in imageFiles: id, href = oeb.manifest.generate(id='image', href=ascii_filename(f)) item = oeb.manifest.add(id, href, m) item.html_input_href = f return oeb