def test_backup(self): # {{{ 'Test the automatic backup of changed metadata' cl = self.cloned_library cache = self.init_cache(cl) ae, af, sf = self.assertEqual, self.assertFalse, cache.set_field # First empty dirtied cache.dump_metadata() af(cache.dirtied_cache) from calibre.db.backup import MetadataBackup interval = 0.01 mb = MetadataBackup(cache, interval=interval, scheduling_interval=0) mb.start() try: ae(sf('title', {1:'title1', 2:'title2', 3:'title3'}), {1,2,3}) ae(sf('authors', {1:'author1 & author2', 2:'author1 & author2', 3:'author1 & author2'}), {1,2,3}) count = 6 while cache.dirty_queue_length() and count > 0: mb.join(2) count -= 1 af(cache.dirty_queue_length()) finally: mb.stop() mb.join(2) af(mb.is_alive()) from calibre.ebooks.metadata.opf2 import OPF for book_id in (1, 2, 3): raw = cache.read_backup(book_id) opf = OPF(BytesIO(raw)) ae(opf.title, 'title%d'%book_id) ae(opf.authors, ['author1', 'author2'])
def do_one_apply(self): if self.apply_current_idx >= len(self.apply_id_map): return self.finalize_apply() i, mi = self.apply_id_map[self.apply_current_idx] if self.gui.current_db.has_id(i): if isinstance(mi, tuple): opf, cover = mi if opf: mi = OPF(open(opf, 'rb'), basedir=os.path.dirname(opf), populate_spine=False).to_book_metadata() self.apply_mi(i, mi) if cover: self.gui.current_db.set_cover(i, open(cover, 'rb'), notify=False, commit=False) self.applied_ids.add(i) else: self.apply_mi(i, mi) self.apply_current_idx += 1 if self.apply_pd is not None: self.apply_pd.value += 1 QTimer.singleShot(50, self.do_one_apply)
def test_dirtied(self): # {{{ 'Test the setting of the dirtied flag and the last_modified column' cl = self.cloned_library cache = self.init_cache(cl) ae, af, sf = self.assertEqual, self.assertFalse, cache.set_field # First empty dirtied cache.dump_metadata() af(cache.dirtied_cache) af(self.init_cache(cl).dirtied_cache) prev = cache.field_for('last_modified', 3) import calibre.db.cache as c from datetime import timedelta utime = prev+timedelta(days=1) onowf = c.nowf c.nowf = lambda: utime try: ae(sf('title', {3:'xxx'}), {3}) self.assertTrue(3 in cache.dirtied_cache) ae(cache.field_for('last_modified', 3), utime) cache.dump_metadata() raw = cache.read_backup(3) from calibre.ebooks.metadata.opf2 import OPF opf = OPF(BytesIO(raw)) ae(opf.title, 'xxx') finally: c.nowf = onowf
def run(self): try: if DEBUG_DIALOG: self.results = self.sample_results() else: res = fork_job('calibre.ebooks.metadata.sources.worker', 'single_identify', (self.title, self.authors, self.identifiers), no_output=True, abort=self.abort) self.results, covers, caches, log_dump = res['result'] self.results = [ OPF(BytesIO(r), basedir=getcwd(), populate_spine=False).to_book_metadata() for r in self.results ] for r, cov in zip(self.results, covers): r.has_cached_cover_url = cov self.caches.update(caches) self.log.load(log_dump) for i, result in enumerate(self.results): result.gui_rank = i except WorkerError as e: self.error = force_unicode(e.orig_tb) except: import traceback self.error = force_unicode(traceback.format_exc())
def opf(self): if self._opf_cached is None: try: with closing(self.open(self.opf_path)) as f: self._opf_cached = OPF(f, self.root, populate_spine=False) except KeyError: raise EPubException("missing OPF package file") return self._opf_cached
def get_metadata(stream): if isinstance(stream, bytes): stream = DummyFile(stream) root = parse_opf(stream) ver = parse_opf_version(root.get('version')) opf = OPF(None, preparsed_opf=root, read_toc=False) return opf.to_book_metadata(), ver, opf.raster_cover, opf.first_spine_item( )
def process_result(self, group_id, result): if result.err: mi = self.report_metadata_failure(group_id, result.traceback) paths = self.file_groups[group_id] has_cover = False duplicate_info = set() if self.add_formats_to_existing else False else: paths, opf, has_cover, duplicate_info = result.value try: mi = OPF(BytesIO(opf), basedir=self.tdir, populate_spine=False, try_to_guess_cover=False).to_book_metadata() mi.read_metadata_failed = False except Exception: mi = self.report_metadata_failure(group_id, traceback.format_exc()) if mi.is_null('title'): for path in paths: mi.title = os.path.splitext(os.path.basename(path))[0] break if mi.application_id == '__calibre_dummy__': mi.application_id = None if gprefs.get('tag_map_on_add_rules'): from calibre.ebooks.metadata.tag_mapper import map_tags mi.tags = map_tags(mi.tags, gprefs['tag_map_on_add_rules']) self.pd.msg = mi.title cover_path = os.path.join(self.tdir, '%s.cdata' % group_id) if has_cover else None if self.db is None: if paths: self.items.append((mi, cover_path, paths)) return if self.add_formats_to_existing: identical_book_ids = find_identical_books( mi, self.find_identical_books_data) if identical_book_ids: try: self.merge_books(mi, cover_path, paths, identical_book_ids) except Exception: a = self.report.append a(''), a('-' * 70) a(_('Failed to merge the book: ') + mi.title) [a('\t' + f) for f in paths] a(_('With error:')), a(traceback.format_exc()) else: self.add_book(mi, cover_path, paths) else: if duplicate_info or icu_lower( mi.title or _('Unknown')) in self.added_duplicate_info: self.duplicates.append((mi, cover_path, paths)) else: self.add_book(mi, cover_path, paths)
def update_metadata(ebook, new_opf): from calibre.ebooks.metadata.opf2 import OPF from calibre.ebooks.metadata.epub import update_metadata opfpath = ebook.name_to_abspath(ebook.opf_name) with ebook.open(ebook.opf_name, 'r+b') as stream, open(new_opf, 'rb') as ns: opf = OPF(stream, basedir=os.path.dirname(opfpath), populate_spine=False, unquote_urls=False) mi = OPF(ns, unquote_urls=False, populate_spine=False).to_book_metadata() mi.cover, mi.cover_data = None, (None, None) update_metadata(opf, mi, apply_null=True, update_timestamp=True) stream.seek(0) stream.truncate() stream.write(opf.render())
def main(do_identify, covers, metadata, ensure_fields, tdir): failed_ids = set() failed_covers = set() all_failed = True log = GUILog() patch_plugins() for book_id, mi in iteritems(metadata): mi = OPF(BytesIO(mi), basedir=tdir, populate_spine=False).to_book_metadata() title, authors, identifiers = mi.title, mi.authors, mi.identifiers cdata = None log.clear() if do_identify: results = [] try: results = identify(log, Event(), title=title, authors=authors, identifiers=identifiers) except: pass if results: all_failed = False mi = merge_result(mi, results[0], ensure_fields=ensure_fields) identifiers = mi.identifiers if not mi.is_null('rating'): # set_metadata expects a rating out of 10 mi.rating *= 2 with open(os.path.join(tdir, '%d.mi' % book_id), 'wb') as f: f.write(metadata_to_opf(mi, default_lang='und')) else: log.error('Failed to download metadata for', title) failed_ids.add(book_id) if covers: cdata = download_cover(log, title=title, authors=authors, identifiers=identifiers) if cdata is None: failed_covers.add(book_id) else: with open(os.path.join(tdir, '%d.cover' % book_id), 'wb') as f: f.write(cdata[-1]) all_failed = False with open(os.path.join(tdir, '%d.log' % book_id), 'wb') as f: f.write(log.plain_text.encode('utf-8')) return failed_ids, failed_covers, all_failed
def do_set_metadata(opts, mi, stream, stream_type): mi = MetaInformation(mi) for x in ('guide', 'toc', 'manifest', 'spine'): setattr(mi, x, None) from_opf = getattr(opts, 'from_opf', None) if from_opf is not None: from calibre.ebooks.metadata.opf2 import OPF opf_mi = OPF(open(from_opf, 'rb')).to_book_metadata() mi.smart_update(opf_mi) for pref in config().option_set.preferences: if pref.name in ('to_opf', 'from_opf', 'authors', 'title_sort', 'author_sort', 'get_cover', 'cover', 'tags', 'lrf_bookid', 'identifiers'): continue val = getattr(opts, pref.name, None) if val is not None: setattr(mi, pref.name, val) if getattr(opts, 'authors', None) is not None: mi.authors = string_to_authors(opts.authors) mi.author_sort = authors_to_sort_string(mi.authors) if getattr(opts, 'author_sort', None) is not None: mi.author_sort = opts.author_sort if getattr(opts, 'title_sort', None) is not None: mi.title_sort = opts.title_sort elif getattr(opts, 'title', None) is not None: mi.title_sort = title_sort(opts.title) if getattr(opts, 'tags', None) is not None: mi.tags = [t.strip() for t in opts.tags.split(',')] if getattr(opts, 'series', None) is not None: mi.series = opts.series.strip() if getattr(opts, 'series_index', None) is not None: mi.series_index = float(opts.series_index.strip()) if getattr(opts, 'pubdate', None) is not None: mi.pubdate = parse_date(opts.pubdate, assume_utc=False, as_utc=False) if getattr(opts, 'identifiers', None): val = { k.strip(): v.strip() for k, v in (x.partition(':')[0::2] for x in opts.identifiers) } if val: orig = mi.get_identifiers() orig.update(val) val = {k: v for k, v in orig.iteritems() if k and v} mi.set_identifiers(val) if getattr(opts, 'cover', None) is not None: ext = os.path.splitext(opts.cover)[1].replace('.', '').upper() mi.cover_data = (ext, open(opts.cover, 'rb').read()) with force_identifiers: set_metadata(stream, mi, stream_type)
def read_serialized_metadata(data): from calibre.ebooks.metadata.opf2 import OPF from calibre.utils.date import parse_date mi = OPF(data['opf'], try_to_guess_cover=False, populate_spine=False, basedir=os.path.dirname(data['opf'])).to_book_metadata() try: mi.last_modified = parse_date(data['last_modified']) except: pass mi.cover, mi.cover_data = None, (None, None) cdata = None if 'cover' in data: with lopen(data['cover'], 'rb') as f: cdata = f.read() return mi, cdata
def do_paste(self, ignore_excluded_fields=False): rows = self.gui.library_view.selectionModel().selectedRows() if not rows or len(rows) == 0: return error_dialog(self.gui, _('Cannot paste metadata'), _('No books selected'), show=True) c = QApplication.clipboard() md = c.mimeData() if not md.hasFormat('application/calibre-book-metadata'): return error_dialog(self.gui, _('Cannot paste metadata'), _('No copied metadata available'), show=True) if len(rows) > 1: if not confirm(_( 'You are pasting metadata onto <b>multiple books</b> ({num_of_books}). Are you' ' sure you want to do that?').format( num_of_books=len(rows)), 'paste-onto-multiple', parent=self.gui): return data = bytes(md.data('application/calibre-book-metadata')) mi = OPF(BytesIO(data), populate_spine=False, read_toc=False, try_to_guess_cover=False).to_book_metadata() mi.application_id = mi.uuid_id = None if ignore_excluded_fields: exclude = set() else: exclude = set(tweaks['exclude_fields_on_paste']) paste_cover = 'cover' not in exclude cover = md.imageData() if paste_cover else None exclude.discard('cover') for field in exclude: mi.set_null(field) db = self.gui.current_db book_ids = {db.id(r.row()) for r in rows} title_excluded = 'title' in exclude authors_excluded = 'authors' in exclude for book_id in book_ids: if title_excluded: mi.title = db.new_api.field_for('title', book_id) if authors_excluded: mi.authors = db.new_api.field_for('authors', book_id) db.new_api.set_metadata(book_id, mi, ignore_errors=True) if cover: db.new_api.set_cover({book_id: cover for book_id in book_ids}) self.refresh_books_after_metadata_edit(book_ids)
def set_metadata_opf2(root, cover_prefix, mi, opf_version, cover_data=None, apply_null=False, update_timestamp=False, force_identifiers=False, add_missing_cover=True): mi = MetaInformation(mi) for x in ('guide', 'toc', 'manifest', 'spine'): setattr(mi, x, None) opf = OPF(None, preparsed_opf=root, read_toc=False) if mi.languages: mi.languages = normalize_languages(list(opf.raw_languages) or [], mi.languages) opf.smart_update(mi, apply_null=apply_null) if getattr(mi, 'uuid', None): opf.application_id = mi.uuid if apply_null or force_identifiers: opf.set_identifiers(mi.get_identifiers()) else: orig = opf.get_identifiers() orig.update(mi.get_identifiers()) opf.set_identifiers({k:v for k, v in orig.items() if k and v}) if update_timestamp and mi.timestamp is not None: opf.timestamp = mi.timestamp raster_cover = opf.raster_cover if raster_cover is None and cover_data is not None and add_missing_cover: guide_raster_cover = opf.guide_raster_cover i = None if guide_raster_cover is not None: i = guide_raster_cover raster_cover = i.get('href') else: if cover_prefix and not cover_prefix.endswith('/'): cover_prefix += '/' name = cover_prefix + 'cover.jpg' i = create_manifest_item(opf.root, name, 'cover') if i is not None: raster_cover = name if i is not None: if opf_version.major < 3: [x.getparent().remove(x) for x in opf.root.xpath('//*[local-name()="meta" and @name="cover"]')] m = opf.create_metadata_element('meta', is_dc=False) m.set('name', 'cover'), m.set('content', i.get('id')) else: for x in opf.root.xpath('//*[local-name()="item" and contains(@properties, "cover-image")]'): x.set('properties', x.get('properties').replace('cover-image', '').strip()) i.set('properties', 'cover-image') with pretty_print: return opf.render(), raster_cover
def convert_text(self, oeb_book): from calibre.ebooks.pdf.writer import PDFWriter from calibre.ebooks.metadata.opf2 import OPF self.log.debug('Serializing oeb input to disk for processing...') self.get_cover_data() with TemporaryDirectory('_pdf_out') as oeb_dir: from calibre.customize.ui import plugin_for_output_format oeb_output = plugin_for_output_format('oeb') oeb_output.convert(oeb_book, oeb_dir, self.input_plugin, self.opts, self.log) opfpath = glob.glob(os.path.join(oeb_dir, '*.opf'))[0] opf = OPF(opfpath, os.path.dirname(opfpath)) self.write(PDFWriter, [s.path for s in opf.spine])
def set_metadata(stream, mi): replacements = {} # Get the OPF in the archive. with ZipFile(stream) as zf: opf_path = get_first_opf_name(zf) opf_stream = io.BytesIO(zf.read(opf_path)) opf = OPF(opf_stream) # Cover. new_cdata = None try: new_cdata = mi.cover_data[1] if not new_cdata: raise Exception('no cover') except: try: with open(mi.cover, 'rb') as f: new_cdata = f.read() except: pass if new_cdata: cpath = opf.raster_cover if not cpath: cpath = 'cover.jpg' new_cover = _write_new_cover(new_cdata, cpath) replacements[cpath] = open(new_cover.name, 'rb') mi.cover = cpath # Update the metadata. opf.smart_update(mi, replace_metadata=True) newopf = io.BytesIO(opf.render()) safe_replace(stream, opf_path, newopf, extra_replacements=replacements, add_missing=True) # Cleanup temporary files. try: if cpath is not None: replacements[cpath].close() os.remove(replacements[cpath].name) except: pass
def get_metadata(book_id): oldmi = db.get_metadata(book_id, index_is_id=True, get_cover=True, cover_as_data=True) opf, cov = id_map[book_id] if opf is None: newmi = Metadata(oldmi.title, authors=tuple(oldmi.authors)) else: with open(opf, 'rb') as f: newmi = OPF(f, basedir=os.path.dirname(opf), populate_spine=False).to_book_metadata() newmi.cover, newmi.cover_data = None, (None, None) for x in ('title', 'authors'): if newmi.is_null(x): # Title and author are set to null if they are # the same as the originals as an optimization, # we undo that, as it is confusing. newmi.set(x, copy.copy(oldmi.get(x))) if cov: with open(cov, 'rb') as f: newmi.cover_data = ('jpg', f.read()) return oldmi, newmi
def get_metadata(stream, extract_cover=True): ''' Return metadata as a L{MetaInfo} object ''' mi = MetaInformation(_('Unknown'), [_('Unknown')]) stream.seek(0) try: with ZipFile(stream) as zf: opf_name = get_first_opf_name(zf) opf_stream = StringIO(zf.read(opf_name)) opf = OPF(opf_stream) mi = opf.to_book_metadata() if extract_cover: cover_href = opf.raster_cover if cover_href: mi.cover_data = (os.path.splitext(cover_href)[1], zf.read(cover_href)) except: return mi return mi
def opf_metadata(opfpath): if hasattr(opfpath, 'read'): f = opfpath opfpath = getattr(f, 'name', getcwd()) else: f = open(opfpath, 'rb') try: opf = OPF(f, os.path.dirname(opfpath)) if opf.application_id is not None: mi = opf.to_book_metadata() if hasattr(opf, 'cover') and opf.cover: cpath = os.path.join(os.path.dirname(opfpath), opf.cover) if os.access(cpath, os.R_OK): fmt = cpath.rpartition('.')[-1] data = open(cpath, 'rb').read() mi.cover_data = (fmt, data) return mi except: import traceback traceback.print_exc() pass
def zip_opf_metadata(opfpath, zf): from calibre.ebooks.metadata.opf2 import OPF if hasattr(opfpath, 'read'): f = opfpath opfpath = getattr(f, 'name', getcwd()) else: f = open(opfpath, 'rb') opf = OPF(f, os.path.dirname(opfpath)) mi = opf.to_book_metadata() # This is broken, in that it only works for # when both the OPF file and the cover file are in the root of the # zip file and the cover is an actual raster image, but I don't care # enough to make it more robust if getattr(mi, 'cover', None): covername = os.path.basename(mi.cover) mi.cover = None names = zf.namelist() if covername in names: fmt = covername.rpartition('.')[-1] data = zf.read(covername) mi.cover_data = (fmt, data) return mi
def convert_text(self, oeb_book): from calibre.ebooks.metadata.opf2 import OPF if self.opts.old_pdf_engine: from calibre.ebooks.pdf.writer import PDFWriter PDFWriter else: from calibre.ebooks.pdf.render.from_html import PDFWriter self.log.debug('Serializing oeb input to disk for processing...') self.get_cover_data() self.handle_embedded_fonts() with TemporaryDirectory('_pdf_out') as oeb_dir: from calibre.customize.ui import plugin_for_output_format oeb_output = plugin_for_output_format('oeb') oeb_output.convert(oeb_book, oeb_dir, self.input_plugin, self.opts, self.log) opfpath = glob.glob(os.path.join(oeb_dir, '*.opf'))[0] opf = OPF(opfpath, os.path.dirname(opfpath)) self.write(PDFWriter, [s.path for s in opf.spine], getattr(opf, 'toc', None))
def handle_zip_of_opf_files(self, stream): ''' Given a zip up of a bunch of opf files, either merge them or add them to library ''' result = {'updated': 0, 'added': 0} with ZipFile(stream, 'r') as zf: self.start_applying_updates() for zi in zf.infolist(): ext = zi.filename.rpartition('.')[-1].lower() if ext in {'opf'}: try: raw = zf.open(zi) opf = OPF(raw) mi = opf.to_book_metadata() casanova_id = self.extract_id(mi) if casanova_id: book_mi = self.get_casanova_metadata( casanova_id['id']) if book_mi: # Update an existing book's metadata! result['updated'] = result['updated'] + 1 self.apply_metadata_update( casanova_id['id'], book_mi, mi) else: # Create a new book entry result['added'] = result['added'] + 1 self.model.db.import_book(mi, []) except: foo = False if ext in {'jpg', 'png', 'gif'}: # try and handle the cover casanova_id = zi.filename.partition('.')[0].lower() if casanova_id in self.book_map: book_id = self.book_map[casanova_id] raw = zf.open(zi) self.db.set_cover(book_id, raw) self.finish_applying_updates() return result
def paste_metadata(self): rows = self.gui.library_view.selectionModel().selectedRows() if not rows or len(rows) == 0: return error_dialog(self.gui, _('Cannot paste metadata'), _('No books selected'), show=True) c = QApplication.clipboard() md = c.mimeData() if not md.hasFormat('application/calibre-book-metadata'): return error_dialog(self.gui, _('Cannot paste metadata'), _('No copied metadata available'), show=True) if len(rows) > 1: if not confirm(_( 'You are pasting metadata onto <b>multiple books</b> ({num_of_books}). Are you' ' sure you want to do that?').format( num_of_books=len(rows)), 'paste-onto-multiple', parent=self.gui): return data = bytes(md.data('application/calibre-book-metadata')) mi = OPF(BytesIO(data), populate_spine=False, read_toc=False, try_to_guess_cover=False).to_book_metadata() mi.application_id = mi.uuid_id = None cover = md.imageData() db = self.gui.current_db book_ids = {db.id(r.row()) for r in rows} for book_id in book_ids: db.new_api.set_metadata(book_id, mi, ignore_errors=True) if cover: db.new_api.set_cover({book_id: cover for book_id in book_ids}) self.refresh_books_after_metadata_edit(book_ids)
def __init__(self): try: mimetype = self.open('mimetype').read().rstrip() if mimetype != OCF.MIMETYPE: print 'WARNING: Invalid mimetype declaration', mimetype except: print 'WARNING: Epub doesn\'t contain a mimetype declaration' try: with closing(self.open(OCF.CONTAINER_PATH)) as f: self.container = Container(f) except KeyError: raise EPubException("missing OCF container.xml file") self.opf_path = self.container[OPF.MIMETYPE] try: with closing(self.open(self.opf_path)) as f: self.opf = OPF(f, self.root, populate_spine=False) except KeyError: raise EPubException("missing OPF package file") try: with closing(self.open(self.ENCRYPTION_PATH)) as f: self.encryption_meta = Encryption(f.read()) except: self.encryption_meta = Encryption(None)
def convert(self, oeb_book, output_path, input_plugin, opts, log): from lxml import etree from calibre.ebooks.oeb.base import OEB_IMAGES, SVG_MIME from calibre.ebooks.metadata.opf2 import OPF, metadata_to_opf from calibre.utils.zipfile import ZipFile from calibre.utils.filenames import ascii_filename # HTML if opts.htmlz_css_type == 'inline': from calibre.ebooks.htmlz.oeb2html import OEB2HTMLInlineCSSizer OEB2HTMLizer = OEB2HTMLInlineCSSizer elif opts.htmlz_css_type == 'tag': from calibre.ebooks.htmlz.oeb2html import OEB2HTMLNoCSSizer OEB2HTMLizer = OEB2HTMLNoCSSizer else: from calibre.ebooks.htmlz.oeb2html import OEB2HTMLClassCSSizer as OEB2HTMLizer with TemporaryDirectory(u'_htmlz_output') as tdir: htmlizer = OEB2HTMLizer(log) html = htmlizer.oeb2html(oeb_book, opts) fname = u'index' if opts.htmlz_title_filename: from calibre.utils.filenames import shorten_components_to fname = shorten_components_to(100, (ascii_filename( unicode_type(oeb_book.metadata.title[0])), ))[0] with open(os.path.join(tdir, fname + u'.html'), 'wb') as tf: if isinstance(html, unicode_type): html = html.encode('utf-8') tf.write(html) # CSS if opts.htmlz_css_type == 'class' and opts.htmlz_class_style == 'external': with open(os.path.join(tdir, u'style.css'), 'wb') as tf: tf.write(htmlizer.get_css(oeb_book)) # Images images = htmlizer.images if images: if not os.path.exists(os.path.join(tdir, u'images')): os.makedirs(os.path.join(tdir, u'images')) for item in oeb_book.manifest: if item.media_type in OEB_IMAGES and item.href in images: if item.media_type == SVG_MIME: data = unicode_type( etree.tostring(item.data, encoding=unicode_type)) else: data = item.data fname = os.path.join(tdir, u'images', images[item.href]) with open(fname, 'wb') as img: img.write(data) # Cover cover_path = None try: cover_data = None if oeb_book.metadata.cover: term = oeb_book.metadata.cover[0].term cover_data = oeb_book.guide[term].item.data if cover_data: from calibre.utils.img import save_cover_data_to cover_path = os.path.join(tdir, u'cover.jpg') with lopen(cover_path, 'w') as cf: cf.write('') save_cover_data_to(cover_data, cover_path) except: import traceback traceback.print_exc() # Metadata with open(os.path.join(tdir, u'metadata.opf'), 'wb') as mdataf: opf = OPF( io.BytesIO( etree.tostring(oeb_book.metadata.to_opf1(), encoding='UTF-8'))) mi = opf.to_book_metadata() if cover_path: mi.cover = u'cover.jpg' mdataf.write(metadata_to_opf(mi)) htmlz = ZipFile(output_path, 'w') htmlz.add_dir(tdir)
def __enter__(self, processed=False, only_input_plugin=False, run_char_count=True, read_anchor_map=True, extract_embedded_fonts_for_qt=False): ''' Convert an ebook file into an exploded OEB book suitable for display in viewers/preprocessing etc. ''' from calibre.ebooks.conversion.plumber import Plumber, create_oebbook self.delete_on_exit = [] self._tdir = TemporaryDirectory('_ebook_iter') self.base = self._tdir.__enter__() plumber = Plumber(self.pathtoebook, self.base, self.log) plumber.setup_options() if self.pathtoebook.lower().endswith('.opf'): plumber.opts.dont_package = True if hasattr(plumber.opts, 'no_process'): plumber.opts.no_process = True plumber.input_plugin.for_viewer = True with plumber.input_plugin, open(plumber.input, 'rb') as inf: self.pathtoopf = plumber.input_plugin(inf, plumber.opts, plumber.input_fmt, self.log, {}, self.base) if not only_input_plugin: # Run the HTML preprocess/parsing from the conversion pipeline as # well if (processed or plumber.input_fmt.lower() in {'pdb', 'pdf', 'rb'} and not hasattr(self.pathtoopf, 'manifest')): if hasattr(self.pathtoopf, 'manifest'): self.pathtoopf = write_oebbook(self.pathtoopf, self.base) self.pathtoopf = create_oebbook(self.log, self.pathtoopf, plumber.opts) if hasattr(self.pathtoopf, 'manifest'): self.pathtoopf = write_oebbook(self.pathtoopf, self.base) self.book_format = os.path.splitext(self.pathtoebook)[1][1:].upper() if getattr(plumber.input_plugin, 'is_kf8', False): self.book_format = 'KF8' self.opf = getattr(plumber.input_plugin, 'optimize_opf_parsing', None) if self.opf is None: self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf)) self.language = self.opf.language if self.language: self.language = self.language.lower() ordered = [i for i in self.opf.spine if i.is_linear] + \ [i for i in self.opf.spine if not i.is_linear] self.spine = [] Spiny = partial(SpineItem, read_anchor_map=read_anchor_map, run_char_count=run_char_count) is_comic = plumber.input_fmt.lower() in {'cbc', 'cbz', 'cbr', 'cb7'} for i in ordered: spath = i.path mt = None if i.idref is not None: mt = self.opf.manifest.type_for_id(i.idref) if mt is None: mt = guess_type(spath)[0] try: self.spine.append(Spiny(spath, mime_type=mt)) if is_comic: self.spine[-1].is_single_page = True except: self.log.warn('Missing spine item:', repr(spath)) cover = self.opf.cover if cover and self.ebook_ext in { 'lit', 'mobi', 'prc', 'opf', 'fb2', 'azw', 'azw3' }: cfile = os.path.join(self.base, 'calibre_iterator_cover.html') rcpath = os.path.relpath(cover, self.base).replace(os.sep, '/') chtml = (TITLEPAGE % prepare_string_for_xml(rcpath, True)).encode('utf-8') with open(cfile, 'wb') as f: f.write(chtml) self.spine[0:0] = [Spiny(cfile, mime_type='application/xhtml+xml')] self.delete_on_exit.append(cfile) if self.opf.path_to_html_toc is not None and \ self.opf.path_to_html_toc not in self.spine: try: self.spine.append(Spiny(self.opf.path_to_html_toc)) except: import traceback traceback.print_exc() sizes = [i.character_count for i in self.spine] self.pages = [ math.ceil(i / float(self.CHARACTERS_PER_PAGE)) for i in sizes ] for p, s in zip(self.pages, self.spine): s.pages = p start = 1 for s in self.spine: s.start_page = start start += s.pages s.max_page = s.start_page + s.pages - 1 self.toc = self.opf.toc if read_anchor_map: create_indexing_data(self.spine, self.toc) self.read_bookmarks() if extract_embedded_fonts_for_qt: from calibre.ebooks.oeb.iterator.extract_fonts import extract_fonts try: extract_fonts(self.opf, self.log) except: ol = self.log.filter_level self.log.filter_level = self.log.DEBUG self.log.exception('Failed to extract fonts') self.log.filter_level = ol return self
def convert(self, stream, options, file_ext, log, accelerators): """Convert a KePub file into a structure calibre can process.""" log("KEPUBInput::convert - start") from calibre.utils.zipfile import ZipFile from calibre import walk from calibre.ebooks import DRMError from calibre.ebooks.metadata.opf2 import OPF try: zf = ZipFile(stream) cwd = os.getcwdu() if sys.version_info.major == 2 else os.getcwd() zf.extractall(cwd) except Exception: log.exception("KEPUB appears to be invalid ZIP file, trying a " "more forgiving ZIP parser") from calibre.utils.localunzip import extractall stream.seek(0) extractall(stream) opf = self.find_opf() if opf is None: for f in walk("."): if (f.lower().endswith(".opf") and "__MACOSX" not in f and not os.path.basename(f).startswith(".")): opf = os.path.abspath(f) break path = getattr(stream, "name", "stream") if opf is None: raise ValueError( _( # noqa: F821 "{0} is not a valid KEPUB file (could not find opf)"). format(path)) encfile = os.path.abspath("rights.xml") if os.path.exists(encfile): raise DRMError(os.path.basename(path)) cwd = os.getcwdu() if sys.version_info.major == 2 else os.getcwd() opf = os.path.relpath(opf, cwd) parts = os.path.split(opf) opf = OPF(opf, os.path.dirname(os.path.abspath(opf))) self.encrypted_fonts = [] if len(parts) > 1 and parts[0]: delta = "/".join(parts[:-1]) + "/" for elem in opf.itermanifest(): elem.set("href", delta + elem.get("href")) for elem in opf.iterguide(): elem.set("href", delta + elem.get("href")) f = (self.rationalize_cover3 if opf.package_version >= 3.0 else self.rationalize_cover2) self.removed_cover = f(opf, log) self.optimize_opf_parsing = opf for x in opf.itermanifest(): if x.get("media-type", "") == "application/x-dtbook+xml": raise ValueError( _("EPUB files with DTBook markup are not supported" ) # noqa: F821 ) not_for_spine = set() for y in opf.itermanifest(): id_ = y.get("id", None) if id_ and y.get("media-type", None) in { "application/vnd.adobe-page-template+xml", "application/vnd.adobe.page-template+xml", "application/adobe-page-template+xml", "application/adobe.page-template+xml", "application/text", }: not_for_spine.add(id_) seen = set() for x in list(opf.iterspine()): ref = x.get("idref", None) if not ref or ref in not_for_spine or ref in seen: x.getparent().remove(x) continue seen.add(ref) if len(list(opf.iterspine())) == 0: raise ValueError( _("No valid entries in the spine of this EPUB") # noqa: F821 ) with open("content.opf", "wb") as nopf: nopf.write(opf.render()) return os.path.abspath("content.opf")
def get_metadata2(root, ver): opf = OPF(None, preparsed_opf=root, read_toc=False) return opf.to_book_metadata(), ver, opf.raster_cover, opf.first_spine_item( )
def test_annotations(self): # {{{ 'Test handling of annotations' from calibre.utils.date import utcnow, EPOCH cl = self.cloned_library cache = self.init_cache(cl) # First empty dirtied cache.dump_metadata() self.assertFalse(cache.dirtied_cache) def a(**kw): ts = utcnow() kw['timestamp'] = utcnow().isoformat() return kw, (ts - EPOCH).total_seconds() annot_list = [ a(type='bookmark', title='bookmark1 changed', seq=1), a(type='highlight', highlighted_text='text1', uuid='1', seq=2), a(type='highlight', highlighted_text='text2', uuid='2', seq=3, notes='notes2 some word changed again'), ] def map_as_list(amap): ans = [] for items in amap.values(): ans.extend(items) ans.sort(key=lambda x:x['seq']) return ans cache.set_annotations_for_book(1, 'moo', annot_list) amap = cache.annotations_map_for_book(1, 'moo') self.assertEqual(3, len(cache.all_annotations_for_book(1))) self.assertEqual([x[0] for x in annot_list], map_as_list(amap)) self.assertFalse(cache.dirtied_cache) cache.check_dirtied_annotations() self.assertEqual(set(cache.dirtied_cache), {1}) cache.dump_metadata() cache.check_dirtied_annotations() self.assertFalse(cache.dirtied_cache) # Test searching results = cache.search_annotations('"changed"') self.assertEqual([1, 3], [x['id'] for x in results]) results = cache.search_annotations('"changed"', annotation_type='bookmark') self.assertEqual([1], [x['id'] for x in results]) results = cache.search_annotations('"Changed"') # changed and change stem differently in english and other euro languages self.assertEqual([1, 3], [x['id'] for x in results]) results = cache.search_annotations('"SOMe"') self.assertEqual([3], [x['id'] for x in results]) results = cache.search_annotations('"change"', use_stemming=False) self.assertFalse(results) results = cache.search_annotations('"bookmark1"', highlight_start='[', highlight_end=']') self.assertEqual(results[0]['text'], '[bookmark1] changed') results = cache.search_annotations('"word"', highlight_start='[', highlight_end=']', snippet_size=3) self.assertEqual(results[0]['text'], '…some [word] changed…') self.assertRaises(FTSQueryError, cache.search_annotations, 'AND OR') fts_l = [a(type='bookmark', title='路坎坷走来', seq=1),] cache.set_annotations_for_book(1, 'moo', fts_l) results = cache.search_annotations('路', highlight_start='[', highlight_end=']') self.assertEqual(results[0]['text'], '[路]坎坷走来') annot_list[0][0]['title'] = 'changed title' cache.set_annotations_for_book(1, 'moo', annot_list) amap = cache.annotations_map_for_book(1, 'moo') self.assertEqual([x[0] for x in annot_list], map_as_list(amap)) del annot_list[1] cache.set_annotations_for_book(1, 'moo', annot_list) amap = cache.annotations_map_for_book(1, 'moo') self.assertEqual([x[0] for x in annot_list], map_as_list(amap)) cache.check_dirtied_annotations() cache.dump_metadata() from calibre.ebooks.metadata.opf2 import OPF raw = cache.read_backup(1) opf = OPF(BytesIO(raw)) cache.restore_annotations(1, list(opf.read_annotations())) amap = cache.annotations_map_for_book(1, 'moo') self.assertEqual([x[0] for x in annot_list], map_as_list(amap))
def test_against_opf2(self): # {{{ # opf2 {{{ raw = '''<package xmlns="http://www.idpf.org/2007/opf" unique-identifier="uuid_id" version="2.0"> <metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf"> <dc:identifier opf:scheme="calibre" id="calibre_id">1698</dc:identifier> <dc:identifier opf:scheme="uuid" id="uuid_id">27106d11-0721-44bc-bcdd-2840f31aaec0</dc:identifier> <dc:title>DOCX Demo</dc:title> <dc:creator opf:file-as="Goyal, Kovid" opf:role="aut">Kovid Goyal</dc:creator> <dc:contributor opf:file-as="calibre" opf:role="bkp">calibre (2.57.1) [http://calibre-ebook.com]</dc:contributor> <dc:date>2016-02-17T10:53:08+00:00</dc:date> <dc:description>Demonstration of DOCX support in calibre</dc:description> <dc:publisher>Kovid Goyal</dc:publisher> <dc:identifier opf:scheme="K">xxx</dc:identifier> <dc:language>eng</dc:language> <dc:subject>calibre</dc:subject> <dc:subject>conversion</dc:subject> <dc:subject>docs</dc:subject> <dc:subject>ebook</dc:subject> <meta content="{"Kovid Goyal": ""}" name="calibre:author_link_map"/> <meta content="Demos" name="calibre:series"/> <meta content="1" name="calibre:series_index"/> <meta content="10" name="calibre:rating"/> <meta content="2015-12-11T16:28:36+00:00" name="calibre:timestamp"/> <meta content="DOCX Demo" name="calibre:title_sort"/> <meta content="{"crew.crow": [], "crew.moose": [], "crew": []}" name="calibre:user_categories"/> <meta name="calibre:user_metadata:#number" content="{"kind": "field", "column": "value", "is_csp": false, "name": "Number", "rec_index": 29, "#extra#": null, "colnum": 12, "is_multiple2": {}, "category_sort": "value", "display": {"number_format": null}, "search_terms": ["#number"], "is_editable": true, "datatype": "int", "link_column": "value", "#value#": 31, "is_custom": true, "label": "number", "table": "custom_column_12", "is_multiple": null, "is_category": false}"/> <meta name="calibre:user_metadata:#genre" content="{"kind": "field", "column": "value", "is_csp": false, "name": "Genre", "rec_index": 26, "#extra#": null, "colnum": 9, "is_multiple2": {}, "category_sort": "value", "display": {"use_decorations": 0}, "search_terms": ["#genre"], "is_editable": true, "datatype": "text", "link_column": "value", "#value#": "Demos", "is_custom": true, "label": "genre", "table": "custom_column_9", "is_multiple": null, "is_category": true}"/> <meta name="calibre:user_metadata:#commetns" content="{"kind": "field", "column": "value", "is_csp": false, "name": "My Comments", "rec_index": 23, "#extra#": null, "colnum": 13, "is_multiple2": {}, "category_sort": "value", "display": {}, "search_terms": ["#commetns"], "is_editable": true, "datatype": "comments", "link_column": "value", "#value#": "<div><b><i>Testing</i></b> extra <font color=\"#aa0000\">comments</font></div>", "is_custom": true, "label": "commetns", "table": "custom_column_13", "is_multiple": null, "is_category": false}"/> <meta name="calibre:user_metadata:#formats" content="{"kind": "field", "column": "value", "is_csp": false, "name": "Formats", "rec_index": 25, "#extra#": null, "colnum": 4, "is_multiple2": {}, "category_sort": "value", "display": {"composite_template": "{formats}", "contains_html": false, "use_decorations": 0, "composite_sort": "text", "make_category": false}, "search_terms": ["#formats"], "is_editable": true, "datatype": "composite", "link_column": "value", "#value#": "AZW3, DOCX, EPUB", "is_custom": true, "label": "formats", "table": "custom_column_4", "is_multiple": null, "is_category": false}"/> <meta name="calibre:user_metadata:#rating" content="{"kind": "field", "column": "value", "is_csp": false, "name": "My Rating", "rec_index": 30, "#extra#": null, "colnum": 1, "is_multiple2": {}, "category_sort": "value", "display": {}, "search_terms": ["#rating"], "is_editable": true, "datatype": "rating", "link_column": "value", "#value#": 10, "is_custom": true, "label": "rating", "table": "custom_column_1", "is_multiple": null, "is_category": true}"/> <meta name="calibre:user_metadata:#series" content="{"kind": "field", "column": "value", "is_csp": false, "name": "My Series2", "rec_index": 31, "#extra#": 1.0, "colnum": 5, "is_multiple2": {}, "category_sort": "value", "display": {}, "search_terms": ["#series"], "is_editable": true, "datatype": "series", "link_column": "value", "#value#": "s", "is_custom": true, "label": "series", "table": "custom_column_5", "is_multiple": null, "is_category": true}"/> <meta name="calibre:user_metadata:#tags" content="{"kind": "field", "column": "value", "is_csp": false, "name": "My Tags", "rec_index": 33, "#extra#": null, "colnum": 11, "is_multiple2": {"ui_to_list": ",", "cache_to_list": "|", "list_to_ui": ", "}, "category_sort": "value", "display": {"is_names": false, "description": "A tag like column for me"}, "search_terms": ["#tags"], "is_editable": true, "datatype": "text", "link_column": "value", "#value#": ["t1", "t2"], "is_custom": true, "label": "tags", "table": "custom_column_11", "is_multiple": "|", "is_category": true}"/> <meta name="calibre:user_metadata:#yesno" content="{"kind": "field", "column": "value", "is_csp": false, "name": "Yes/No", "rec_index": 34, "#extra#": null, "colnum": 7, "is_multiple2": {}, "category_sort": "value", "display": {}, "search_terms": ["#yesno"], "is_editable": true, "datatype": "bool", "link_column": "value", "#value#": false, "is_custom": true, "label": "yesno", "table": "custom_column_7", "is_multiple": null, "is_category": false}"/> <meta name="calibre:user_metadata:#myenum" content="{"kind": "field", "column": "value", "is_csp": false, "name": "My Enum", "rec_index": 28, "#extra#": null, "colnum": 6, "is_multiple2": {}, "category_sort": "value", "display": {"enum_colors": [], "enum_values": ["One", "Two", "Three"], "use_decorations": 0}, "search_terms": ["#myenum"], "is_editable": true, "datatype": "enumeration", "link_column": "value", "#value#": "Two", "is_custom": true, "label": "myenum", "table": "custom_column_6", "is_multiple": null, "is_category": true}"/> <meta name="calibre:user_metadata:#isbn" content="{"kind": "field", "column": "value", "is_csp": false, "name": "ISBN", "rec_index": 27, "#extra#": null, "colnum": 3, "is_multiple2": {}, "category_sort": "value", "display": {"composite_template": "{identifiers:select(isbn)}", "contains_html": false, "use_decorations": 0, "composite_sort": "text", "make_category": false}, "search_terms": ["#isbn"], "is_editable": true, "datatype": "composite", "link_column": "value", "#value#": "", "is_custom": true, "label": "isbn", "table": "custom_column_3", "is_multiple": null, "is_category": false}"/> <meta name="calibre:user_metadata:#authors" content="{"kind": "field", "column": "value", "is_csp": false, "name": "My Authors", "rec_index": 22, "#extra#": null, "colnum": 10, "is_multiple2": {"ui_to_list": "&", "cache_to_list": "|", "list_to_ui": " & "}, "category_sort": "value", "display": {"is_names": true}, "search_terms": ["#authors"], "is_editable": true, "datatype": "text", "link_column": "value", "#value#": ["calibre, Kovid Goyal"], "is_custom": true, "label": "authors", "table": "custom_column_10", "is_multiple": "|", "is_category": true}"/> <meta name="calibre:user_metadata:#date" content="{"kind": "field", "column": "value", "is_csp": false, "name": "My Date", "rec_index": 24, "#extra#": null, "colnum": 2, "is_multiple2": {}, "category_sort": "value", "display": {"date_format": "dd-MM-yyyy", "description": ""}, "search_terms": ["#date"], "is_editable": true, "datatype": "datetime", "link_column": "value", "#value#": {"__value__": "2016-02-17T10:54:15+00:00", "__class__": "datetime.datetime"}, "is_custom": true, "label": "date", "table": "custom_column_2", "is_multiple": null, "is_category": false}"/> </metadata><manifest><item href="start.html" media-type="text/html" id="m1"/></manifest><spine><itemref idref="m1"/></spine> </package>''' # }}} def compare_metadata(mi2, mi3): self.ae(mi2.get_all_user_metadata(False), mi3.get_all_user_metadata(False)) for field in ALL_METADATA_FIELDS: if field not in 'manifest spine': v2, v3 = getattr(mi2, field, None), getattr(mi3, field, None) self.ae(v2, v3, '%s: %r != %r' % (field, v2, v3)) mi2 = OPF(BytesIO(raw.encode('utf-8'))).to_book_metadata() root = etree.fromstring(raw) root.set('version', '3.0') mi3, _, raster_cover, first_spine_item = read_metadata( root, return_extra_data=True) self.assertIsNone(raster_cover) self.ae('start.html', first_spine_item) compare_metadata(mi2, mi3) apply_metadata(root, mi3, force_identifiers=True) nmi = read_metadata(root) compare_metadata(mi3, nmi) mi3.tags = [] mi3.set('#tags', []) mi3.set('#number', 0) mi3.set('#commetns', '') apply_metadata(root, mi3, update_timestamp=True) self.assertFalse(root.xpath('//*/@name')) nmi = read_metadata(root) self.assertEqual(mi2.tags, nmi.tags) self.assertEqual(mi2.get('#tags'), nmi.get('#tags')) self.assertEqual(mi2.get('#commetns'), nmi.get('#commetns')) self.assertEqual(0, nmi.get('#number')) apply_metadata(root, mi3, apply_null=True) nmi = read_metadata(root) self.assertFalse(nmi.tags) self.assertFalse(nmi.get('#tags')) self.assertFalse(nmi.get('#commetns')) self.assertIsNone( apply_metadata(root, mi3, cover_data=b'x', cover_prefix='xxx', add_missing_cover=False)) self.ae('xxx/cover.jpg', apply_metadata(root, mi3, cover_data=b'x', cover_prefix='xxx'))
def add_to_db(self, data): from calibre.ebooks.metadata.opf2 import OPF gui = self.parent() if gui is None: return m = gui.library_view.model() count = 0 needs_rescan = False duplicates = [] added_ids = set() for fname, tdir in data.iteritems(): paths = [os.path.join(self.worker.path, fname)] sz = os.path.join(tdir, 'size.txt') try: with open(sz, 'rb') as f: sz = int(f.read()) if sz != os.stat(paths[0]).st_size: raise Exception('Looks like the file was written to after' ' we tried to read metadata') except: needs_rescan = True try: self.worker.staging.remove(fname) except KeyError: pass continue mi = os.path.join(tdir, 'metadata.opf') if not os.access(mi, os.R_OK): continue mi = [OPF(open(mi, 'rb'), tdir, populate_spine=False).to_book_metadata()] dups, ids = m.add_books(paths, [os.path.splitext(fname)[1][1:].upper()], mi, add_duplicates=not gprefs['auto_add_check_for_duplicates'], return_ids=True) added_ids |= set(ids) num = len(ids) if dups: path = dups[0][0] with open(os.path.join(tdir, 'dup_cache.'+dups[1][0].lower()), 'wb') as dest, open(path, 'rb') as src: shutil.copyfileobj(src, dest) dups[0][0] = dest.name duplicates.append(dups) try: os.remove(paths[0]) self.worker.staging.remove(fname) except: import traceback traceback.print_exc() count += num if duplicates: paths, formats, metadata = [], [], [] for p, f, mis in duplicates: paths.extend(p) formats.extend(f) metadata.extend(mis) dups = [(mic, mic.cover, [p]) for mic, p in zip(metadata, paths)] d = DuplicatesQuestion(m.db, dups, parent=gui) dups = tuple(d.duplicates) if dups: paths, formats, metadata = [], [], [] for mi, cover, book_paths in dups: paths.extend(book_paths) formats.extend([p.rpartition('.')[-1] for p in book_paths]) metadata.extend([mi for i in book_paths]) ids = m.add_books(paths, formats, metadata, add_duplicates=True, return_ids=True)[1] added_ids |= set(ids) num = len(ids) count += num for tdir in data.itervalues(): try: shutil.rmtree(tdir) except: pass if added_ids and gprefs['auto_add_auto_convert']: self.auto_convert.emit(added_ids) if count > 0: m.books_added(count) gui.status_bar.show_message(_( 'Added %(num)d book(s) automatically from %(src)s') % dict(num=count, src=self.worker.path), 2000) if hasattr(gui, 'db_images'): gui.db_images.beginResetModel(), gui.db_images.endResetModel() if needs_rescan: QTimer.singleShot(2000, self.dir_changed)