def import_book_as_epub(srcpath, destpath, log=default_log): if not destpath.lower().endswith('.epub'): raise ValueError('Can only import books into the EPUB format, not %s' % (os.path.basename(destpath))) with TemporaryDirectory('eei') as tdir: tdir = os.path.abspath( os.path.realpath(tdir) ) # Needed to handle the multiple levels of symlinks for /tmp on OS X plumber = Plumber(srcpath, tdir, log) plumber.setup_options() if srcpath.lower().endswith('.opf'): plumber.opts.dont_package = True if hasattr(plumber.opts, 'no_process'): plumber.opts.no_process = True plumber.input_plugin.for_viewer = True with plumber.input_plugin, open(plumber.input, 'rb') as inf: pathtoopf = plumber.input_plugin(inf, plumber.opts, plumber.input_fmt, log, {}, tdir) if hasattr(pathtoopf, 'manifest'): from calibre.ebooks.oeb.iterator.book import write_oebbook pathtoopf = write_oebbook(pathtoopf, tdir) c = Container(tdir, pathtoopf, log) auto_fill_manifest(c) # Auto fix all HTML/CSS for name, mt in iteritems(c.mime_map): if mt in set(OEB_DOCS) | set(OEB_STYLES): c.parsed(name) c.dirty(name) c.commit() zf = initialize_container(destpath, opf_name=c.opf_name) with zf: for name in c.name_path_map: zf.writestr(name, c.raw_data(name, decode=False))
def import_book_as_epub(srcpath, destpath, log=default_log): if not destpath.lower().endswith('.epub'): raise ValueError('Can only import books into the EPUB format, not %s' % (os.path.basename(destpath))) with TemporaryDirectory('eei') as tdir: tdir = os.path.abspath(os.path.realpath(tdir)) # Needed to handle the multiple levels of symlinks for /tmp on OS X plumber = Plumber(srcpath, tdir, log) plumber.setup_options() if srcpath.lower().endswith('.opf'): plumber.opts.dont_package = True if hasattr(plumber.opts, 'no_process'): plumber.opts.no_process = True plumber.input_plugin.for_viewer = True with plumber.input_plugin, open(plumber.input, 'rb') as inf: pathtoopf = plumber.input_plugin(inf, plumber.opts, plumber.input_fmt, log, {}, tdir) if hasattr(pathtoopf, 'manifest'): from calibre.ebooks.oeb.iterator.book import write_oebbook pathtoopf = write_oebbook(pathtoopf, tdir) c = Container(tdir, pathtoopf, log) auto_fill_manifest(c) # Auto fix all HTML/CSS for name, mt in c.mime_map.iteritems(): if mt in set(OEB_DOCS) | set(OEB_STYLES): c.parsed(name) c.dirty(name) c.commit() zf = initialize_container(destpath, opf_name=c.opf_name) with zf: for name in c.name_path_map: zf.writestr(name, c.raw_data(name, decode=False))
def extract_book(pathtoebook, tdir, log=None, view_kepub=False, processed=False, only_input_plugin=False): from calibre.ebooks.conversion.plumber import Plumber, create_oebbook from calibre.utils.logging import default_log log = log or default_log plumber = Plumber(pathtoebook, tdir, log, view_kepub=view_kepub) plumber.setup_options() if pathtoebook.lower().endswith('.opf'): plumber.opts.dont_package = True if hasattr(plumber.opts, 'no_process'): plumber.opts.no_process = True plumber.input_plugin.for_viewer = True with plumber.input_plugin, open(plumber.input, 'rb') as inf: pathtoopf = plumber.input_plugin(inf, plumber.opts, plumber.input_fmt, log, {}, tdir) if not only_input_plugin: # Run the HTML preprocess/parsing from the conversion pipeline as # well if (processed or plumber.input_fmt.lower() in {'pdb', 'pdf', 'rb'} and not hasattr(pathtoopf, 'manifest')): if hasattr(pathtoopf, 'manifest'): pathtoopf = write_oebbook(pathtoopf, tdir) pathtoopf = create_oebbook(log, pathtoopf, plumber.opts) if hasattr(pathtoopf, 'manifest'): pathtoopf = write_oebbook(pathtoopf, tdir) book_format = os.path.splitext(pathtoebook)[1][1:].upper() if getattr(plumber.input_plugin, 'is_kf8', False): fs = ':joint' if getattr(plumber.input_plugin, 'mobi_is_joint', False) else '' book_format = 'KF8' + fs return book_format, pathtoopf, plumber.input_fmt
def __enter__(self, processed=False, only_input_plugin=False, run_char_count=True, read_anchor_map=True, extract_embedded_fonts_for_qt=False): ''' Convert an ebook file into an exploded OEB book suitable for display in viewers/preprocessing etc. ''' from calibre.ebooks.conversion.plumber import Plumber, create_oebbook self.delete_on_exit = [] self._tdir = TemporaryDirectory('_ebook_iter') self.base = self._tdir.__enter__() plumber = Plumber(self.pathtoebook, self.base, self.log) plumber.setup_options() if self.pathtoebook.lower().endswith('.opf'): plumber.opts.dont_package = True if hasattr(plumber.opts, 'no_process'): plumber.opts.no_process = True plumber.input_plugin.for_viewer = True with plumber.input_plugin, open(plumber.input, 'rb') as inf: self.pathtoopf = plumber.input_plugin(inf, plumber.opts, plumber.input_fmt, self.log, {}, self.base) if not only_input_plugin: # Run the HTML preprocess/parsing from the conversion pipeline as # well if (processed or plumber.input_fmt.lower() in {'pdb', 'pdf', 'rb'} and not hasattr(self.pathtoopf, 'manifest')): if hasattr(self.pathtoopf, 'manifest'): self.pathtoopf = write_oebbook(self.pathtoopf, self.base) self.pathtoopf = create_oebbook(self.log, self.pathtoopf, plumber.opts) if hasattr(self.pathtoopf, 'manifest'): self.pathtoopf = write_oebbook(self.pathtoopf, self.base) self.book_format = os.path.splitext(self.pathtoebook)[1][1:].upper() if getattr(plumber.input_plugin, 'is_kf8', False): self.book_format = 'KF8' self.opf = getattr(plumber.input_plugin, 'optimize_opf_parsing', None) if self.opf is None: self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf)) self.language = self.opf.language if self.language: self.language = self.language.lower() ordered = [i for i in self.opf.spine if i.is_linear] + \ [i for i in self.opf.spine if not i.is_linear] self.spine = [] Spiny = partial(SpineItem, read_anchor_map=read_anchor_map, run_char_count=run_char_count) is_comic = plumber.input_fmt.lower() in {'cbc', 'cbz', 'cbr', 'cb7'} for i in ordered: spath = i.path mt = None if i.idref is not None: mt = self.opf.manifest.type_for_id(i.idref) if mt is None: mt = guess_type(spath)[0] try: self.spine.append(Spiny(spath, mime_type=mt)) if is_comic: self.spine[-1].is_single_page = True except: self.log.warn('Missing spine item:', repr(spath)) cover = self.opf.cover if cover and self.ebook_ext in { 'lit', 'mobi', 'prc', 'opf', 'fb2', 'azw', 'azw3' }: cfile = os.path.join(self.base, 'calibre_iterator_cover.html') rcpath = os.path.relpath(cover, self.base).replace(os.sep, '/') chtml = (TITLEPAGE % prepare_string_for_xml(rcpath, True)).encode('utf-8') with open(cfile, 'wb') as f: f.write(chtml) self.spine[0:0] = [Spiny(cfile, mime_type='application/xhtml+xml')] self.delete_on_exit.append(cfile) if self.opf.path_to_html_toc is not None and \ self.opf.path_to_html_toc not in self.spine: try: self.spine.append(Spiny(self.opf.path_to_html_toc)) except: import traceback traceback.print_exc() sizes = [i.character_count for i in self.spine] self.pages = [ math.ceil(i / float(self.CHARACTERS_PER_PAGE)) for i in sizes ] for p, s in zip(self.pages, self.spine): s.pages = p start = 1 for s in self.spine: s.start_page = start start += s.pages s.max_page = s.start_page + s.pages - 1 self.toc = self.opf.toc if read_anchor_map: create_indexing_data(self.spine, self.toc) self.read_bookmarks() if extract_embedded_fonts_for_qt: from calibre.ebooks.oeb.iterator.extract_fonts import extract_fonts try: extract_fonts(self.opf, self.log) except: ol = self.log.filter_level self.log.filter_level = self.log.DEBUG self.log.exception('Failed to extract fonts') self.log.filter_level = ol return self
def __enter__(self, processed=False, only_input_plugin=False, run_char_count=True, read_anchor_map=True, view_kepub=False, read_links=True): ''' Convert an ebook file into an exploded OEB book suitable for display in viewers/preprocessing etc. ''' from calibre.ebooks.conversion.plumber import Plumber, create_oebbook self.delete_on_exit = [] self._tdir = TemporaryDirectory('_ebook_iter') self.base = self._tdir.__enter__() plumber = Plumber(self.pathtoebook, self.base, self.log, view_kepub=view_kepub) plumber.setup_options() if self.pathtoebook.lower().endswith('.opf'): plumber.opts.dont_package = True if hasattr(plumber.opts, 'no_process'): plumber.opts.no_process = True plumber.input_plugin.for_viewer = True with plumber.input_plugin, open(plumber.input, 'rb') as inf: self.pathtoopf = plumber.input_plugin(inf, plumber.opts, plumber.input_fmt, self.log, {}, self.base) if not only_input_plugin: # Run the HTML preprocess/parsing from the conversion pipeline as # well if (processed or plumber.input_fmt.lower() in {'pdb', 'pdf', 'rb'} and not hasattr(self.pathtoopf, 'manifest')): if hasattr(self.pathtoopf, 'manifest'): self.pathtoopf = write_oebbook(self.pathtoopf, self.base) self.pathtoopf = create_oebbook(self.log, self.pathtoopf, plumber.opts) if hasattr(self.pathtoopf, 'manifest'): self.pathtoopf = write_oebbook(self.pathtoopf, self.base) self.book_format = os.path.splitext(self.pathtoebook)[1][1:].upper() if getattr(plumber.input_plugin, 'is_kf8', False): fs = ':joint' if getattr(plumber.input_plugin, 'mobi_is_joint', False) else '' self.book_format = 'KF8' + fs self.opf = getattr(plumber.input_plugin, 'optimize_opf_parsing', None) if self.opf is None: self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf)) self.language = self.opf.language if self.language: self.language = self.language.lower() ordered = [i for i in self.opf.spine if i.is_linear] + \ [i for i in self.opf.spine if not i.is_linear] self.spine = [] Spiny = partial(SpineItem, read_anchor_map=read_anchor_map, read_links=read_links, run_char_count=run_char_count, from_epub=self.book_format == 'EPUB') is_comic = plumber.input_fmt.lower() in {'cbc', 'cbz', 'cbr', 'cb7'} for i in ordered: spath = i.path mt = None if i.idref is not None: mt = self.opf.manifest.type_for_id(i.idref) if mt is None: mt = guess_type(spath)[0] try: self.spine.append(Spiny(spath, mime_type=mt)) if is_comic: self.spine[-1].is_single_page = True except: self.log.warn('Missing spine item:', repr(spath)) cover = self.opf.cover if cover and self.ebook_ext in {'lit', 'mobi', 'prc', 'opf', 'fb2', 'azw', 'azw3', 'docx', 'htmlz'}: cfile = os.path.join(self.base, 'calibre_iterator_cover.html') rcpath = os.path.relpath(cover, self.base).replace(os.sep, '/') chtml = (TITLEPAGE%prepare_string_for_xml(rcpath, True)).encode('utf-8') with open(cfile, 'wb') as f: f.write(chtml) self.spine[0:0] = [Spiny(cfile, mime_type='application/xhtml+xml')] self.delete_on_exit.append(cfile) if self.opf.path_to_html_toc is not None and \ self.opf.path_to_html_toc not in self.spine: try: self.spine.append(Spiny(self.opf.path_to_html_toc)) except: import traceback traceback.print_exc() sizes = [i.character_count for i in self.spine] self.pages = [math.ceil(i/float(self.CHARACTERS_PER_PAGE)) for i in sizes] for p, s in zip(self.pages, self.spine): s.pages = p start = 1 for s in self.spine: s.start_page = start start += s.pages s.max_page = s.start_page + s.pages - 1 self.toc = self.opf.toc if read_anchor_map: create_indexing_data(self.spine, self.toc) self.verify_links() self.read_bookmarks() return self