def do_one_isbn_add(self): try: db = self.gui.library_view.model().db try: x = self.isbn_books.pop(0) except IndexError: self.gui.library_view.model().books_added(self.isbn_add_dialog.value) self.isbn_add_dialog.accept() self.gui.iactions['Edit Metadata'].download_metadata( ids=self.add_by_isbn_ids, ensure_fields=frozenset(['title', 'authors'])) return mi = MetaInformation(None) mi.isbn = x['isbn'] if self.isbn_add_tags: mi.tags = list(self.isbn_add_tags) fmts = [] if x['path'] is None else [x['path']] self.add_by_isbn_ids.add(db.import_book(mi, fmts)) self.isbn_add_dialog.value += 1 QTimer.singleShot(10, self.do_one_isbn_add) except: self.isbn_add_dialog.accept() raise
def setUp(self): self.tdir = PersistentTemporaryDirectory('_calibre_dbtest') self.db = LibraryDatabase2(self.tdir) f = open(os.path.join(self.tdir, 'test.txt'), 'w+b') f.write('test') paths = list(repeat(f, 3)) formats = list(repeat('txt', 3)) m1 = MetaInformation('Test Ebook 1', ['Test Author 1']) m1.tags = ['tag1', 'tag2'] m1.publisher = 'Test Publisher 1' m1.rating = 2 m1.series = 'Test Series 1' m1.series_index = 3 m1.author_sort = 'as1' m1.isbn = 'isbn1' m1.cover_data = ('jpg', self.img) m2 = MetaInformation('Test Ebook 2', ['Test Author 2']) m2.tags = ['tag3', 'tag4'] m2.publisher = 'Test Publisher 2' m2.rating = 3 m2.series = 'Test Series 2' m2.series_index = 1 m2.author_sort = 'as1' m2.isbn = 'isbn1' self.db.add_books(paths, formats, [m1, m2, m2], add_duplicates=True) self.m1, self.m2 = m1, m2
def get_metadata(stream, cover=True): with TemporaryDirectory('_pdf_metadata_read') as pdfpath: stream.seek(0) with open(os.path.join(pdfpath, 'src.pdf'), 'wb') as f: shutil.copyfileobj(stream, f) try: res = fork_job('calibre.ebooks.metadata.pdf', 'read_info', (pdfpath, bool(cover))) except WorkerError as e: prints(e.orig_tb) raise RuntimeError('Failed to run pdfinfo') info = res['result'] with open(res['stdout_stderr'], 'rb') as f: raw = f.read().strip() if raw: prints(raw) if not info: raise ValueError('Could not read info dict from PDF') covpath = os.path.join(pdfpath, 'cover.jpg') cdata = None if cover and os.path.exists(covpath): with open(covpath, 'rb') as f: cdata = f.read() title = info.get('Title', None) au = info.get('Author', None) if au is None: au = [_('Unknown')] else: au = string_to_authors(au) mi = MetaInformation(title, au) # if isbn is not None: # mi.isbn = isbn creator = info.get('Creator', None) if creator: mi.book_producer = creator keywords = info.get('Keywords', None) mi.tags = [] if keywords: mi.tags = [x.strip() for x in keywords.split(',')] isbn = [check_isbn(x) for x in mi.tags if check_isbn(x)] if isbn: mi.isbn = isbn = isbn[0] mi.tags = [x for x in mi.tags if check_isbn(x) != isbn] subject = info.get('Subject', None) if subject: mi.tags.insert(0, subject) if cdata: mi.cover_data = ('jpeg', cdata) return mi
def get_metadata(stream, cover=True): with TemporaryDirectory("_pdf_metadata_read") as pdfpath: stream.seek(0) with open(os.path.join(pdfpath, "src.pdf"), "wb") as f: shutil.copyfileobj(stream, f) try: res = fork_job("calibre.ebooks.metadata.pdf", "read_info", (pdfpath, bool(cover))) except WorkerError as e: prints(e.orig_tb) raise RuntimeError("Failed to run pdfinfo") info = res["result"] with open(res["stdout_stderr"], "rb") as f: raw = f.read().strip() if raw: prints(raw) if not info: raise ValueError("Could not read info dict from PDF") covpath = os.path.join(pdfpath, "cover.jpg") cdata = None if cover and os.path.exists(covpath): with open(covpath, "rb") as f: cdata = f.read() title = info.get("Title", None) au = info.get("Author", None) if au is None: au = [_("Unknown")] else: au = string_to_authors(au) mi = MetaInformation(title, au) # if isbn is not None: # mi.isbn = isbn creator = info.get("Creator", None) if creator: mi.book_producer = creator keywords = info.get("Keywords", None) mi.tags = [] if keywords: mi.tags = [x.strip() for x in keywords.split(",")] isbn = [check_isbn(x) for x in mi.tags if check_isbn(x)] if isbn: mi.isbn = isbn = isbn[0] mi.tags = [x for x in mi.tags if check_isbn(x) != isbn] subject = info.get("Subject", None) if subject: mi.tags.insert(0, subject) if cdata: mi.cover_data = ("jpeg", cdata) return mi
def get_metadata(stream, cover=True): with TemporaryDirectory('_pdf_metadata_read') as pdfpath: stream.seek(0) with open(os.path.join(pdfpath, 'src.pdf'), 'wb') as f: shutil.copyfileobj(stream, f) try: res = fork_job('calibre.ebooks.metadata.pdf', 'read_info', (pdfpath, bool(cover))) except WorkerError as e: prints(e.orig_tb) raise RuntimeError('Failed to run pdfinfo') info = res['result'] with open(res['stdout_stderr'], 'rb') as f: raw = f.read().strip() if raw: prints(raw) if not info: raise ValueError('Could not read info dict from PDF') covpath = os.path.join(pdfpath, 'cover.jpg') cdata = None if cover and os.path.exists(covpath): with open(covpath, 'rb') as f: cdata = f.read() title = info.get('Title', None) au = info.get('Author', None) if au is None: au = [_('Unknown')] else: au = string_to_authors(au) mi = MetaInformation(title, au) #if isbn is not None: # mi.isbn = isbn creator = info.get('Creator', None) if creator: mi.book_producer = creator keywords = info.get('Keywords', None) mi.tags = [] if keywords: mi.tags = [x.strip() for x in keywords.split(',')] subject = info.get('Subject', None) if subject: mi.tags.insert(0, subject) if cdata: mi.cover_data = ('jpeg', cdata) return mi
def get_metadata(stream, extract_cover=True): zin = zipfile.ZipFile(stream, 'r') odfs = odfmetaparser() parser = xml.sax.make_parser() parser.setFeature(xml.sax.handler.feature_namespaces, 1) parser.setContentHandler(odfs) content = zin.read('meta.xml') parser.parse(StringIO(content)) data = odfs.seenfields mi = MetaInformation(None, []) if 'title' in data: mi.title = data['title'] if data.get('initial-creator', '').strip(): mi.authors = string_to_authors(data['initial-creator']) elif 'creator' in data: mi.authors = string_to_authors(data['creator']) if 'description' in data: mi.comments = data['description'] if 'language' in data: mi.language = data['language'] if data.get('keywords', ''): mi.tags = [x.strip() for x in data['keywords'].split(',') if x.strip()] opfmeta = False # we need this later for the cover opfnocover = False if data.get('opf.metadata','') == 'true': # custom metadata contains OPF information opfmeta = True if data.get('opf.titlesort', ''): mi.title_sort = data['opf.titlesort'] if data.get('opf.authors', ''): mi.authors = string_to_authors(data['opf.authors']) if data.get('opf.authorsort', ''): mi.author_sort = data['opf.authorsort'] if data.get('opf.isbn', ''): isbn = check_isbn(data['opf.isbn']) if isbn is not None: mi.isbn = isbn if data.get('opf.publisher', ''): mi.publisher = data['opf.publisher'] if data.get('opf.pubdate', ''): mi.pubdate = parse_date(data['opf.pubdate'], assume_utc=True) if data.get('opf.series', ''): mi.series = data['opf.series'] if data.get('opf.seriesindex', ''): try: mi.series_index = float(data['opf.seriesindex']) except ValueError: mi.series_index = 1.0 if data.get('opf.language', ''): cl = canonicalize_lang(data['opf.language']) if cl: mi.languages = [cl] opfnocover = data.get('opf.nocover', 'false') == 'true' if not opfnocover: try: read_cover(stream, zin, mi, opfmeta, extract_cover) except: pass # Do not let an error reading the cover prevent reading other data return mi
def do_one_isbn_add(self): try: db = self.gui.library_view.model().db try: x = self.isbn_books.pop(0) except IndexError: self.gui.library_view.model().books_added( self.isbn_add_dialog.value) self.isbn_add_dialog.accept() self.gui.iactions['Edit Metadata'].download_metadata( ids=self.add_by_isbn_ids, ensure_fields=frozenset(['title', 'authors'])) return mi = MetaInformation(None) mi.isbn = x['isbn'] if self.isbn_add_tags: mi.tags = list(self.isbn_add_tags) fmts = [] if x['path'] is None else [x['path']] self.add_by_isbn_ids.add(db.import_book(mi, fmts)) self.isbn_add_dialog.value += 1 QTimer.singleShot(10, self.do_one_isbn_add) except: self.isbn_add_dialog.accept() raise
def get_metadata(stream, cpath=None): if not podofo: raise Unavailable(podofo_err) pt = PersistentTemporaryFile('_podofo.pdf') pt.write(stream.read()) pt.close() server = Server(pool_size=1) job = ParallelJob('read_pdf_metadata', 'Read pdf metadata', lambda x,y:x, args=[pt.name, cpath]) server.add_job(job) while not job.is_finished: time.sleep(0.1) job.update() job.update() server.close() if job.result is None: raise ValueError('Failed to read metadata: ' + job.details) title, authors, creator, tags, ok = job.result if not ok: print 'Failed to extract cover:' print job.details if title == '_': title = getattr(stream, 'name', _('Unknown')) title = os.path.splitext(title)[0] mi = MetaInformation(title, authors) if creator: mi.book_producer = creator if tags: mi.tags = tags if os.path.exists(pt.name): os.remove(pt.name) if ok: mi.cover = cpath return mi
def get_metadata(stream, extract_cover=True): zin = zipfile.ZipFile(stream, 'r') odfs = odfmetaparser() parser = xml.sax.make_parser() parser.setFeature(xml.sax.handler.feature_namespaces, 1) parser.setContentHandler(odfs) content = zin.read('meta.xml') parser.parse(StringIO(content)) data = odfs.seenfields mi = MetaInformation(None, []) if data.has_key('title'): mi.title = data['title'] if data.get('initial-creator', '').strip(): mi.authors = string_to_authors(data['initial-creator']) elif data.has_key('creator'): mi.authors = string_to_authors(data['creator']) if data.has_key('description'): mi.comments = data['description'] if data.has_key('language'): mi.language = data['language'] if data.get('keywords', ''): mi.tags = [x.strip() for x in data['keywords'].split(',') if x.strip()] opfmeta = False # we need this later for the cover opfnocover = False if data.get('opf.metadata','') == 'true': # custom metadata contains OPF information opfmeta = True if data.get('opf.titlesort', ''): mi.title_sort = data['opf.titlesort'] if data.get('opf.authors', ''): mi.authors = string_to_authors(data['opf.authors']) if data.get('opf.authorsort', ''): mi.author_sort = data['opf.authorsort'] if data.get('opf.isbn', ''): isbn = check_isbn(data['opf.isbn']) if isbn is not None: mi.isbn = isbn if data.get('opf.publisher', ''): mi.publisher = data['opf.publisher'] if data.get('opf.pubdate', ''): mi.pubdate = parse_date(data['opf.pubdate'], assume_utc=True) if data.get('opf.series', ''): mi.series = data['opf.series'] if data.get('opf.seriesindex', ''): try: mi.series_index = float(data['opf.seriesindex']) except ValueError: mi.series_index = 1.0 if data.get('opf.language', ''): cl = canonicalize_lang(data['opf.language']) if cl: mi.languages = [cl] opfnocover = data.get('opf.nocover', 'false') == 'true' if not opfnocover: try: read_cover(stream, zin, mi, opfmeta, extract_cover) except: pass # Do not let an error reading the cover prevent reading other data return mi
def add_annotation_to_library(self, db, db_id, annotation): from calibre.ebooks.BeautifulSoup import Tag from calibre.ebooks.metadata import MetaInformation bm = annotation ignore_tags = set(['Catalog', 'Clippings']) if bm.type == 'kindle_bookmark': mi = db.get_metadata(db_id, index_is_id=True) user_notes_soup = self.generate_annotation_html(bm.value) if mi.comments: a_offset = mi.comments.find('<div class="user_annotations">') ad_offset = mi.comments.find( '<hr class="annotations_divider" />') if a_offset >= 0: mi.comments = mi.comments[:a_offset] if ad_offset >= 0: mi.comments = mi.comments[:ad_offset] if set(mi.tags).intersection(ignore_tags): return if mi.comments: hrTag = Tag(user_notes_soup, 'hr') hrTag['class'] = 'annotations_divider' user_notes_soup.insert(0, hrTag) mi.comments += unicode(user_notes_soup.prettify()) else: mi.comments = unicode(user_notes_soup.prettify()) # Update library comments db.set_comment(db_id, mi.comments) # Add bookmark file to db_id db.add_format_with_hooks(db_id, bm.value.bookmark_extension, bm.value.path, index_is_id=True) elif bm.type == 'kindle_clippings': # Find 'My Clippings' author=Kindle in database, or add last_update = 'Last modified %s' % strftime( u'%x %X', bm.value['timestamp'].timetuple()) mc_id = list( db.data.search_getting_ids('title:"My Clippings"', '', sort_results=False)) if mc_id: db.add_format_with_hooks(mc_id[0], 'TXT', bm.value['path'], index_is_id=True) mi = db.get_metadata(mc_id[0], index_is_id=True) mi.comments = last_update db.set_metadata(mc_id[0], mi) else: mi = MetaInformation('My Clippings', authors=['Kindle']) mi.tags = ['Clippings'] mi.comments = last_update db.add_books([bm.value['path']], ['txt'], [mi])
def _start_splitmerge(self,book_list, tdir=None, db=None): # logger.debug(book_list) em = self.get_epubmerge_plugin() es = self.get_epubsplit_plugin() good_list = [ b for b in book_list if b['good'] ] tmp = PersistentTemporaryFile(prefix='merge-', suffix='.epub', dir=tdir) if len(good_list) == 1: deftitle = "New "+good_list[0]['title'] defauthors = good_list[0]['authors'] else: deftitle = "New Chapters Anthology" defauthors = ["Various Authors"] mi = MetaInformation(deftitle,defauthors) tagslists = [ x['tags'] for x in good_list ] mi.tags = [item for sublist in tagslists for item in sublist] mi.comments = "<p>New Chapters from:</p>" mi.comments += '<br/>'.join( [ "%s by %s"%(x['title'],", ".join(x['authors'])) for x in good_list ] ) em.do_merge(tmp, [b['splittmp'] for b in good_list], authoropts=mi.authors, titleopt=mi.title, descopt=mi.comments, tags=mi.tags, keepmetadatafiles=False, ) book_id = db.create_book_entry(mi, add_duplicates=True) db.add_format_with_hooks(book_id, 'EPUB', tmp, index_is_id=True) self.gui.library_view.model().books_added(1) self.gui.library_view.model().refresh_ids([book_id]) # self.gui.iactions['Edit Metadata'].edit_metadata(False) self.gui.tags_view.recount() ## run word counts cp_plugin = self.gui.iactions['Count Pages'] cp_plugin.count_statistics([book_id],['WordCount']) ## run auto convert self.gui.iactions['Convert Books'].auto_convert_auto_add([book_id]) ## add to FFF update lists self.gui.library_view.select_rows([book_id]) fff_plugin = self.gui.iactions['FanFicFare'] fff_plugin.update_lists() remove_dir(tdir)
def do_set_metadata(opts, mi, stream, stream_type): mi = MetaInformation(mi) for x in ('guide', 'toc', 'manifest', 'spine'): setattr(mi, x, None) from_opf = getattr(opts, 'from_opf', None) if from_opf is not None: from calibre.ebooks.metadata.opf2 import OPF opf_mi = OPF(open(from_opf, 'rb')).to_book_metadata() mi.smart_update(opf_mi) for pref in config().option_set.preferences: if pref.name in ('to_opf', 'from_opf', 'authors', 'title_sort', 'author_sort', 'get_cover', 'cover', 'tags', 'lrf_bookid', 'identifiers'): continue val = getattr(opts, pref.name, None) if val is not None: setattr(mi, pref.name, val) if getattr(opts, 'authors', None) is not None: mi.authors = string_to_authors(opts.authors) mi.author_sort = authors_to_sort_string(mi.authors) if getattr(opts, 'author_sort', None) is not None: mi.author_sort = opts.author_sort if getattr(opts, 'title_sort', None) is not None: mi.title_sort = opts.title_sort elif getattr(opts, 'title', None) is not None: mi.title_sort = title_sort(opts.title) if getattr(opts, 'tags', None) is not None: mi.tags = [t.strip() for t in opts.tags.split(',')] if getattr(opts, 'series', None) is not None: mi.series = opts.series.strip() if getattr(opts, 'series_index', None) is not None: mi.series_index = float(opts.series_index.strip()) if getattr(opts, 'pubdate', None) is not None: mi.pubdate = parse_date(opts.pubdate, assume_utc=False, as_utc=False) if getattr(opts, 'identifiers', None): val = { k.strip(): v.strip() for k, v in (x.partition(':')[0::2] for x in opts.identifiers) } if val: orig = mi.get_identifiers() orig.update(val) val = {k: v for k, v in orig.iteritems() if k and v} mi.set_identifiers(val) if getattr(opts, 'cover', None) is not None: ext = os.path.splitext(opts.cover)[1].replace('.', '').upper() mi.cover_data = (ext, open(opts.cover, 'rb').read()) with force_identifiers: set_metadata(stream, mi, stream_type)
def get_clippings_cid(self, title): ''' Find or create cid for title ''' cid = None try: cid = list(self.parent.opts.gui.current_db.data.parse('title:"%s" and tag:Clippings' % title))[0] except: mi = MetaInformation(title, authors = ['Various']) mi.tags = ['Clippings'] cid = self.parent.opts.gui.current_db.create_book_entry(mi, cover=None, add_duplicates=False, force_id=None) return cid
def do_set_metadata(opts, mi, stream, stream_type): mi = MetaInformation(mi) for x in ('guide', 'toc', 'manifest', 'spine'): setattr(mi, x, None) from_opf = getattr(opts, 'from_opf', None) if from_opf is not None: from calibre.ebooks.metadata.opf2 import OPF opf_mi = OPF(open(from_opf, 'rb')).to_book_metadata() mi.smart_update(opf_mi) for pref in config().option_set.preferences: if pref.name in ('to_opf', 'from_opf', 'authors', 'title_sort', 'author_sort', 'get_cover', 'cover', 'tags', 'lrf_bookid', 'identifiers'): continue val = getattr(opts, pref.name, None) if val is not None: setattr(mi, pref.name, val) if getattr(opts, 'authors', None) is not None: mi.authors = string_to_authors(opts.authors) mi.author_sort = authors_to_sort_string(mi.authors) if getattr(opts, 'author_sort', None) is not None: mi.author_sort = opts.author_sort if getattr(opts, 'title_sort', None) is not None: mi.title_sort = opts.title_sort elif getattr(opts, 'title', None) is not None: mi.title_sort = title_sort(opts.title) if getattr(opts, 'tags', None) is not None: mi.tags = [t.strip() for t in opts.tags.split(',')] if getattr(opts, 'series', None) is not None: mi.series = opts.series.strip() if getattr(opts, 'series_index', None) is not None: mi.series_index = float(opts.series_index.strip()) if getattr(opts, 'pubdate', None) is not None: mi.pubdate = parse_date(opts.pubdate, assume_utc=False, as_utc=False) if getattr(opts, 'identifiers', None): val = {k.strip():v.strip() for k, v in (x.partition(':')[0::2] for x in opts.identifiers)} if val: orig = mi.get_identifiers() orig.update(val) val = {k:v for k, v in iteritems(orig) if k and v} mi.set_identifiers(val) if getattr(opts, 'cover', None) is not None: ext = os.path.splitext(opts.cover)[1].replace('.', '').upper() mi.cover_data = (ext, open(opts.cover, 'rb').read()) with force_identifiers: set_metadata(stream, mi, stream_type)
def get_metadata(f): read = lambda at, amount: _read(f, at, amount) f.seek(0) buf = f.read(12) if buf[4:] == 'ftypLRX2': offset = 0 while True: offset += word_be(buf[:4]) try: buf = read(offset, 8) except: raise ValueError('Not a valid LRX file') if buf[4:] == 'bbeb': break offset += 8 buf = read(offset, 16) if buf[:8].decode('utf-16-le') != 'LRF\x00': raise ValueError('Not a valid LRX file') lrf_version = word_le(buf[8:12]) offset += 0x4c compressed_size = short_le(read(offset, 2)) offset += 2 if lrf_version >= 800: offset += 6 compressed_size -= 4 uncompressed_size = word_le(read(offset, 4)) info = decompress(f.read(compressed_size)) if len(info) != uncompressed_size: raise ValueError('LRX file has malformed metadata section') root = etree.fromstring(info) bi = root.find('BookInfo') title = bi.find('Title') title_sort = title.get('reading', None) title = title.text author = bi.find('Author') author_sort = author.get('reading', None) mi = MetaInformation(title, string_to_authors(author.text)) mi.title_sort, mi.author_sort = title_sort, author_sort author = author.text publisher = bi.find('Publisher') mi.publisher = getattr(publisher, 'text', None) mi.tags = [x.text for x in bi.findall('Category')] mi.language = root.find('DocInfo').find('Language').text return mi elif buf[4:8] == 'LRX': raise ValueError('Librie LRX format not supported') else: raise ValueError('Not a LRX file')
def get_metadata(f): read = lambda at, amount: _read(f, at, amount) f.seek(0) buf = f.read(12) if buf[4:] == b'ftypLRX2': offset = 0 while True: offset += word_be(buf[:4]) try: buf = read(offset, 8) except: raise ValueError('Not a valid LRX file') if buf[4:] == b'bbeb': break offset += 8 buf = read(offset, 16) if buf[:8].decode('utf-16-le') != 'LRF\x00': raise ValueError('Not a valid LRX file') lrf_version = word_le(buf[8:12]) offset += 0x4c compressed_size = short_le(read(offset, 2)) offset += 2 if lrf_version >= 800: offset += 6 compressed_size -= 4 uncompressed_size = word_le(read(offset, 4)) info = decompress(f.read(compressed_size)) if len(info) != uncompressed_size: raise ValueError('LRX file has malformed metadata section') root = safe_xml_fromstring(info) bi = root.find('BookInfo') title = bi.find('Title') title_sort = title.get('reading', None) title = title.text author = bi.find('Author') author_sort = author.get('reading', None) mi = MetaInformation(title, string_to_authors(author.text)) mi.title_sort, mi.author_sort = title_sort, author_sort author = author.text publisher = bi.find('Publisher') mi.publisher = getattr(publisher, 'text', None) mi.tags = [x.text for x in bi.findall('Category')] mi.language = root.find('DocInfo').find('Language').text return mi elif buf[4:8] == b'LRX': raise ValueError('Librie LRX format not supported') else: raise ValueError('Not a LRX file')
def add_annotation_to_library(self, db, db_id, annotation): from calibre.ebooks.BeautifulSoup import Tag from calibre.ebooks.metadata import MetaInformation bm = annotation ignore_tags = set(['Catalog', 'Clippings']) if bm.type == 'kindle_bookmark': mi = db.get_metadata(db_id, index_is_id=True) user_notes_soup = self.generate_annotation_html(bm.value) if mi.comments: a_offset = mi.comments.find('<div class="user_annotations">') ad_offset = mi.comments.find('<hr class="annotations_divider" />') if a_offset >= 0: mi.comments = mi.comments[:a_offset] if ad_offset >= 0: mi.comments = mi.comments[:ad_offset] if set(mi.tags).intersection(ignore_tags): return if mi.comments: hrTag = Tag(user_notes_soup,'hr') hrTag['class'] = 'annotations_divider' user_notes_soup.insert(0, hrTag) mi.comments += unicode(user_notes_soup.prettify()) else: mi.comments = unicode(user_notes_soup.prettify()) # Update library comments db.set_comment(db_id, mi.comments) # Add bookmark file to db_id db.add_format_with_hooks(db_id, bm.value.bookmark_extension, bm.value.path, index_is_id=True) elif bm.type == 'kindle_clippings': # Find 'My Clippings' author=Kindle in database, or add last_update = 'Last modified %s' % strftime(u'%x %X',bm.value['timestamp'].timetuple()) mc_id = list(db.data.search_getting_ids('title:"My Clippings"', '', sort_results=False)) if mc_id: db.add_format_with_hooks(mc_id[0], 'TXT', bm.value['path'], index_is_id=True) mi = db.get_metadata(mc_id[0], index_is_id=True) mi.comments = last_update db.set_metadata(mc_id[0], mi) else: mi = MetaInformation('My Clippings', authors=['Kindle']) mi.tags = ['Clippings'] mi.comments = last_update db.add_books([bm.value['path']], ['txt'], [mi])
def do_add_empty(db, title, authors, isbn, tags, series, series_index): from calibre.ebooks.metadata import MetaInformation mi = MetaInformation(None) if title is not None: mi.title = title if authors: mi.authors = authors if isbn: mi.isbn = isbn if tags: mi.tags = tags if series: mi.series, mi.series_index = series, series_index db.import_book(mi, []) write_dirtied(db) send_message()
def add_annotation_to_library(self, db, db_id, annotation): from calibre.ebooks.BeautifulSoup import Tag from calibre.ebooks.metadata import MetaInformation bm = annotation ignore_tags = set(["Catalog", "Clippings"]) if bm.type == "kindle_bookmark": mi = db.get_metadata(db_id, index_is_id=True) user_notes_soup = self.generate_annotation_html(bm.value) if mi.comments: a_offset = mi.comments.find('<div class="user_annotations">') ad_offset = mi.comments.find('<hr class="annotations_divider" />') if a_offset >= 0: mi.comments = mi.comments[:a_offset] if ad_offset >= 0: mi.comments = mi.comments[:ad_offset] if set(mi.tags).intersection(ignore_tags): return if mi.comments: hrTag = Tag(user_notes_soup, "hr") hrTag["class"] = "annotations_divider" user_notes_soup.insert(0, hrTag) mi.comments += unicode(user_notes_soup.prettify()) else: mi.comments = unicode(user_notes_soup.prettify()) # Update library comments db.set_comment(db_id, mi.comments) # Add bookmark file to db_id db.add_format_with_hooks(db_id, bm.value.bookmark_extension, bm.value.path, index_is_id=True) elif bm.type == "kindle_clippings": # Find 'My Clippings' author=Kindle in database, or add last_update = "Last modified %s" % strftime(u"%x %X", bm.value["timestamp"].timetuple()) mc_id = list(db.data.search_getting_ids('title:"My Clippings"', "")) if mc_id: db.add_format_with_hooks(mc_id[0], "TXT", bm.value["path"], index_is_id=True) mi = db.get_metadata(mc_id[0], index_is_id=True) mi.comments = last_update db.set_metadata(mc_id[0], mi) else: mi = MetaInformation("My Clippings", authors=["Kindle"]) mi.tags = ["Clippings"] mi.comments = last_update db.add_books([bm.value["path"]], ["txt"], [mi])
def add_document(self,document): from calibre.ebooks.metadata import MetaInformation mi = MetaInformation('', [_('Unknown')]) mi.title = document['title'] mi.authors = document['authors'] mi.tags = ["Mendeley"] mendeley_id = {} mendeley_id['mendeley'] = document['mendeley_id'] mi.identifiers = mendeley_id mi.series_index = 1 # needed? self.db.add_books([document['path']], ['pdf'], [mi], False, True) os.remove(document['path'])
def add_document(self, document): from calibre.ebooks.metadata import MetaInformation mi = MetaInformation('', [_('Unknown')]) mi.title = document['title'] mi.authors = document['authors'] mi.tags = ["Mendeley"] mendeley_id = {} mendeley_id['mendeley'] = document['mendeley_id'] mi.identifiers = mendeley_id mi.series_index = 1 # needed? self.db.add_books([document['path']], ['pdf'], [mi], False, True) os.remove(document['path'])
def get_metadata(stream): """ Return basic meta-data about the LRF file in C{stream} as a L{MetaInformation} object. @param stream: A file like object or an instance of L{LRFMetaFile} """ lrf = stream if isinstance(stream, LRFMetaFile) else LRFMetaFile(stream) authors = string_to_authors(lrf.author) mi = MetaInformation(lrf.title.strip(), authors) mi.author = lrf.author.strip() mi.comments = lrf.free_text.strip() mi.category = lrf.category.strip() + ', ' + lrf.classification.strip() tags = [x.strip() for x in mi.category.split(',') if x.strip()] if tags: mi.tags = tags if mi.category.strip() == ',': mi.category = None mi.publisher = lrf.publisher.strip() mi.cover_data = lrf.get_cover() try: mi.title_sort = lrf.title_reading.strip() if not mi.title_sort: mi.title_sort = None except: pass try: mi.author_sort = lrf.author_reading.strip() if not mi.author_sort: mi.author_sort = None except: pass if not mi.title or 'unknown' in mi.title.lower(): mi.title = None if not mi.authors: mi.authors = None if not mi.author or 'unknown' in mi.author.lower(): mi.author = None if not mi.category or 'unknown' in mi.category.lower(): mi.category = None if not mi.publisher or 'unknown' in mi.publisher.lower() or \ 'some publisher' in mi.publisher.lower(): mi.publisher = None return mi
def get_metadata(stream): """ Return basic meta-data about the LRF file in C{stream} as a L{MetaInformation} object. @param stream: A file like object or an instance of L{LRFMetaFile} """ lrf = stream if isinstance(stream, LRFMetaFile) else LRFMetaFile(stream) authors = string_to_authors(lrf.author) mi = MetaInformation(lrf.title.strip(), authors) mi.author = lrf.author.strip() mi.comments = lrf.free_text.strip() mi.category = lrf.category.strip()+', '+lrf.classification.strip() tags = [x.strip() for x in mi.category.split(',') if x.strip()] if tags: mi.tags = tags if mi.category.strip() == ',': mi.category = None mi.publisher = lrf.publisher.strip() mi.cover_data = lrf.get_cover() try: mi.title_sort = lrf.title_reading.strip() if not mi.title_sort: mi.title_sort = None except: pass try: mi.author_sort = lrf.author_reading.strip() if not mi.author_sort: mi.author_sort = None except: pass if not mi.title or 'unknown' in mi.title.lower(): mi.title = None if not mi.authors: mi.authors = None if not mi.author or 'unknown' in mi.author.lower(): mi.author = None if not mi.category or 'unknown' in mi.category.lower(): mi.category = None if not mi.publisher or 'unknown' in mi.publisher.lower() or \ 'some publisher' in mi.publisher.lower(): mi.publisher = None return mi
def get_metadata(stream): """ Return metadata as a L{MetaInfo} object """ stream.seek(0) if stream.read(5) != br'{\rtf': return MetaInformation(_('Unknown')) block = get_document_info(stream)[0] if not block: return MetaInformation(_('Unknown')) stream.seek(0) cpg = detect_codepage(stream) stream.seek(0) title_match = title_pat.search(block) if title_match is not None: title = decode(title_match.group(1).strip(), cpg) else: title = _('Unknown') author_match = author_pat.search(block) if author_match is not None: author = decode(author_match.group(1).strip(), cpg) else: author = None mi = MetaInformation(title) if author: mi.authors = [x.strip() for x in author.split(',')] comment_match = comment_pat.search(block) if comment_match is not None: comment = decode(comment_match.group(1).strip(), cpg) mi.comments = comment tags_match = tags_pat.search(block) if tags_match is not None: tags = decode(tags_match.group(1).strip(), cpg) mi.tags = list(filter(None, (x.strip() for x in tags.split(',')))) publisher_match = publisher_pat.search(block) if publisher_match is not None: publisher = decode(publisher_match.group(1).strip(), cpg) mi.publisher = publisher return mi
def get_metadata(stream): """ Return metadata as a L{MetaInfo} object """ stream.seek(0) if stream.read(5) != r'{\rtf': return MetaInformation(_('Unknown')) block = get_document_info(stream)[0] if not block: return MetaInformation(_('Unknown')) stream.seek(0) cpg = detect_codepage(stream) stream.seek(0) title_match = title_pat.search(block) if title_match is not None: title = decode(title_match.group(1).strip(), cpg) else: title = _('Unknown') author_match = author_pat.search(block) if author_match is not None: author = decode(author_match.group(1).strip(), cpg) else: author = None mi = MetaInformation(title) if author: mi.authors = string_to_authors(author) comment_match = comment_pat.search(block) if comment_match is not None: comment = decode(comment_match.group(1).strip(), cpg) mi.comments = comment tags_match = tags_pat.search(block) if tags_match is not None: tags = decode(tags_match.group(1).strip(), cpg) mi.tags = list(filter(None, (x.strip() for x in tags.split(',')))) publisher_match = publisher_pat.search(block) if publisher_match is not None: publisher = decode(publisher_match.group(1).strip(), cpg) mi.publisher = publisher return mi
def get_metadata(self): title, authors = self.get_title_and_authors() mi = MetaInformation(title, authors) publisher = str(self.publisher.text()).strip() if publisher: mi.publisher = publisher author_sort = str(self.author_sort.text()).strip() if author_sort: mi.author_sort = author_sort comments = self.comment.html if comments: mi.comments = comments mi.series_index = float(self.series_index.value()) series = str(self.series.currentText()).strip() if series: mi.series = series tags = [t.strip() for t in str(self.tags.text()).strip().split(',')] if tags: mi.tags = tags return mi
def get_metadata_quick(raw): p = podofo.PDFDoc() p.load(raw) title = p.title if not title: title = '_' author = p.author authors = string_to_authors(author) if author else [_('Unknown')] creator = p.creator try: tags = [x.strip() for x in p.keywords.split(u',')] tags = [x for x in tags if x] except: tags = [] mi = MetaInformation(title, authors) if creator: mi.book_producer = creator if tags: mi.tags = tags return mi
def do_add_empty(dbctx, title, authors, isbn, tags, series, series_index, cover, identifiers, languages): mi = MetaInformation(None) if title is not None: mi.title = title if authors: mi.authors = authors if identifiers: mi.set_identifiers(identifiers) if isbn: mi.isbn = isbn if tags: mi.tags = tags if series: mi.series, mi.series_index = series, series_index if cover: mi.cover = cover if languages: mi.languages = languages ids, duplicates = dbctx.run('add', 'empty', read_cover(mi)) prints(_('Added book ids: %s') % ','.join(map(str, ids)))
def get_metadata(self): title, authors = self.get_title_and_authors() mi = MetaInformation(title, authors) publisher = unicode(self.publisher.text()).strip() if publisher: mi.publisher = publisher author_sort = unicode(self.author_sort.text()).strip() if author_sort: mi.author_sort = author_sort comments = self.comment.html if comments: mi.comments = comments mi.series_index = float(self.series_index.value()) series = unicode(self.series.currentText()).strip() if series: mi.series = series tags = [t.strip() for t in unicode(self.tags.text()).strip().split(',')] if tags: mi.tags = tags return mi
def do_add_empty( dbctx, title, authors, isbn, tags, series, series_index, cover, identifiers, languages ): mi = MetaInformation(None) if title is not None: mi.title = title if authors: mi.authors = authors if identifiers: mi.set_identifiers(identifiers) if isbn: mi.isbn = isbn if tags: mi.tags = tags if series: mi.series, mi.series_index = series, series_index if cover: mi.cover = cover if languages: mi.languages = languages ids, duplicates = dbctx.run('add', 'empty', read_cover(mi)) prints(_('Added book ids: %s') % ','.join(map(str, ids)))
def get_metadata(stream, cpath=None): if not podofo: raise Unavailable(podofo_err) pt = PersistentTemporaryFile('_podofo.pdf') pt.write(stream.read()) pt.close() server = Server(pool_size=1) job = ParallelJob('read_pdf_metadata', 'Read pdf metadata', lambda x, y: x, args=[pt.name, cpath]) server.add_job(job) while not job.is_finished: time.sleep(0.1) job.update() job.update() server.close() if job.result is None: raise ValueError('Failed to read metadata: ' + job.details) title, authors, creator, tags, ok = job.result if not ok: print 'Failed to extract cover:' print job.details if title == '_': title = getattr(stream, 'name', _('Unknown')) title = os.path.splitext(title)[0] mi = MetaInformation(title, authors) if creator: mi.book_producer = creator if tags: mi.tags = tags if os.path.exists(pt.name): os.remove(pt.name) if ok: mi.cover = cpath return mi
def get_metadata(stream): zin = zipfile.ZipFile(stream, 'r') odfs = odfmetaparser() parser = xml.sax.make_parser() parser.setFeature(xml.sax.handler.feature_namespaces, 1) parser.setContentHandler(odfs) content = zin.read('meta.xml') parser.parse(StringIO(content)) data = odfs.seenfields mi = MetaInformation(None, []) if data.has_key('title'): mi.title = data['title'] if data.get('initial-creator', '').strip(): mi.authors = string_to_authors(data['initial-creator']) elif data.has_key('creator'): mi.authors = string_to_authors(data['creator']) if data.has_key('description'): mi.comments = data['description'] if data.has_key('language'): mi.language = data['language'] if data.get('keywords', ''): mi.tags = data['keywords'].split(',') return mi
def populate(self, entries, browser, verbose=False, api_key=''): for x in entries: try: id_url = entry_id(x)[0].text title = self.get_title(x) except: report(verbose) mi = MetaInformation(title, self.get_authors(x)) try: if api_key != '': id_url = id_url + "?apikey=" + api_key raw = browser.open(id_url).read() feed = etree.fromstring(raw) x = entry(feed)[0] except Exception, e: if verbose: print 'Failed to get all details for an entry' print e mi.comments = self.get_description(x, verbose) mi.tags = self.get_tags(x, verbose) mi.isbn = self.get_isbn(x, verbose) mi.publisher = self.get_publisher(x, verbose) mi.pubdate = self.get_date(x, verbose) self.append(mi)
def _do_split(self, db, source_id, misource, splitepub, origlines, newspecs, deftitle=None): linenums, changedtocs, checkedalways = newspecs # logger.debug("updated tocs:%s"%changedtocs) if not self.has_lines(linenums): return #logger.debug("2:%s"%(time.time()-self.t)) self.t = time.time() #logger.debug("linenums:%s"%linenums) defauthors = None if not deftitle and prefs['copytoctitle']: if linenums[0] in changedtocs: deftitle = changedtocs[linenums[0]][0] # already unicoded()'ed elif len(origlines[linenums[0]]['toc']) > 0: deftitle = unicode(origlines[linenums[0]]['toc'][0]) #logger.debug("deftitle:%s"%deftitle) if not deftitle and prefs['copytitle']: deftitle = _("%s Split") % misource.title if prefs['copyauthors']: defauthors = misource.authors mi = MetaInformation(deftitle, defauthors) if prefs['copytags']: mi.tags = misource.tags # [item for sublist in tagslists for item in sublist] if prefs['copylanguages']: mi.languages = misource.languages if prefs['copyseries']: mi.series = misource.series if prefs['copydate']: mi.timestamp = misource.timestamp if prefs['copyrating']: mi.rating = misource.rating if prefs['copypubdate']: mi.pubdate = misource.pubdate if prefs['copypublisher']: mi.publisher = misource.publisher if prefs['copyidentifiers']: mi.set_identifiers(misource.get_identifiers()) if prefs['copycomments'] and misource.comments: mi.comments = "<p>" + _("Split from:") + "</p>" + misource.comments #logger.debug("mi:%s"%mi) book_id = db.create_book_entry(mi, add_duplicates=True) if prefs['copycover'] and misource.has_cover: db.set_cover(book_id, db.cover(source_id, index_is_id=True)) #logger.debug("3:%s"%(time.time()-self.t)) self.t = time.time() custom_columns = self.gui.library_view.model().custom_columns for col, action in six.iteritems(prefs['custom_cols']): #logger.debug("col: %s action: %s"%(col,action)) if col not in custom_columns: #logger.debug("%s not an existing column, skipping."%col) continue coldef = custom_columns[col] #logger.debug("coldef:%s"%coldef) label = coldef['label'] value = db.get_custom(source_id, label=label, index_is_id=True) if value: db.set_custom(book_id, value, label=label, commit=False) #logger.debug("3.5:%s"%(time.time()-self.t)) self.t = time.time() if prefs['sourcecol'] != '' and prefs['sourcecol'] in custom_columns \ and prefs['sourcetemplate']: val = SafeFormat().safe_format(prefs['sourcetemplate'], misource, 'EpubSplit Source Template Error', misource) #logger.debug("Attempting to set %s to %s"%(prefs['sourcecol'],val)) label = custom_columns[prefs['sourcecol']]['label'] if custom_columns[prefs['sourcecol']]['datatype'] == 'series': val = val + (" [%s]" % self.book_count) db.set_custom(book_id, val, label=label, commit=False) self.book_count = self.book_count + 1 db.commit() #logger.debug("4:%s"%(time.time()-self.t)) self.t = time.time() self.gui.library_view.model().books_added(1) self.gui.library_view.select_rows([book_id]) #logger.debug("5:%s"%(time.time()-self.t)) self.t = time.time() editconfig_txt = _( 'You can enable or disable Edit Metadata in Preferences > Plugins > EpubSplit.' ) if prefs['editmetadata']: confirm( '\n' + _('''The book for the new Split EPUB has been created and default metadata filled in. However, the EPUB will *not* be created until after you've reviewed, edited, and closed the metadata dialog that follows. You can fill in the metadata yourself, or use download metadata for known books. If you download or add a cover image, it will be included in the generated EPUB.''' ) + '\n\n' + editconfig_txt + '\n', 'epubsplit_created_now_edit_again', self.gui) self.gui.iactions['Edit Metadata'].edit_metadata(False) try: QApplication.setOverrideCursor(QCursor(Qt.WaitCursor)) #logger.debug("5:%s"%(time.time()-self.t)) self.t = time.time() self.gui.tags_view.recount() self.gui.status_bar.show_message(_('Splitting off from EPUB...'), 60000) mi = db.get_metadata(book_id, index_is_id=True) outputepub = PersistentTemporaryFile(suffix='.epub') coverjpgpath = None if mi.has_cover: # grab the path to the real image. coverjpgpath = os.path.join(db.library_path, db.path(book_id, index_is_id=True), 'cover.jpg') outlist = list(set(linenums + checkedalways)) outlist.sort() splitepub.write_split_epub(outputepub, outlist, changedtocs=changedtocs, authoropts=mi.authors, titleopt=mi.title, descopt=mi.comments, tags=mi.tags, languages=mi.languages, coverjpgpath=coverjpgpath) #logger.debug("6:%s"%(time.time()-self.t)) self.t = time.time() db.add_format_with_hooks(book_id, 'EPUB', outputepub, index_is_id=True) #logger.debug("7:%s"%(time.time()-self.t)) self.t = time.time() self.gui.status_bar.show_message(_('Finished splitting off EPUB.'), 3000) self.gui.library_view.model().refresh_ids([book_id]) self.gui.tags_view.recount() current = self.gui.library_view.currentIndex() self.gui.library_view.model().current_changed( current, self.previous) if self.gui.cover_flow: self.gui.cover_flow.dataChanged() finally: QApplication.restoreOverrideCursor() if not prefs['editmetadata']: confirm( '<p>' + '</p><p>'.join([ _('<b><u>%s</u> by %s</b> has been created and default metadata filled in.' ) % (mi.title, ', '.join(mi.authors)), _('EpubSplit now skips the Edit Metadata step by default.' ), editconfig_txt ]) + '</p>', 'epubsplit_created_now_no_edit_again', self.gui)
def get_metadata_(src, encoding=None): if not isinstance(src, unicode): if not encoding: src = xml_to_unicode(src)[0] else: src = src.decode(encoding, "replace") # Meta data definitions as in # http://www.mobileread.com/forums/showpost.php?p=712544&postcount=9 # Title title = None pat = re.compile(r'<!--.*?TITLE=(?P<q>[\'"])(.+?)(?P=q).*?-->', re.DOTALL) src = src[:150000] # Searching shouldn't take too long match = pat.search(src) if match: title = match.group(2) else: for x in ("DC.title", "DCTERMS.title", "Title"): pat = get_meta_regexp_(x) match = pat.search(src) if match: title = match.group(1) break if not title: pat = re.compile("<title>([^<>]+?)</title>", re.IGNORECASE) match = pat.search(src) if match: title = match.group(1) # Author author = None pat = re.compile(r'<!--.*?AUTHOR=(?P<q>[\'"])(.+?)(?P=q).*?-->', re.DOTALL) match = pat.search(src) if match: author = match.group(2).replace(",", ";") else: for x in ("Author", "DC.creator.aut", "DCTERMS.creator.aut", "DC.creator"): pat = get_meta_regexp_(x) match = pat.search(src) if match: author = match.group(1) break # Create MetaInformation with Title and Author ent_pat = re.compile(r"&(\S+)?;") if title: title = ent_pat.sub(entity_to_unicode, title) if author: author = ent_pat.sub(entity_to_unicode, author) mi = MetaInformation(title, [author] if author else None) # Publisher publisher = None pat = re.compile(r'<!--.*?PUBLISHER=(?P<q>[\'"])(.+?)(?P=q).*?-->', re.DOTALL) match = pat.search(src) if match: publisher = match.group(2) else: for x in ("Publisher", "DC.publisher", "DCTERMS.publisher"): pat = get_meta_regexp_(x) match = pat.search(src) if match: publisher = match.group(1) break if publisher: mi.publisher = ent_pat.sub(entity_to_unicode, publisher) # ISBN isbn = None pat = re.compile(r'<!--.*?ISBN=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: isbn = match.group(1) else: for x in ("ISBN", "DC.identifier.ISBN", "DCTERMS.identifier.ISBN"): pat = get_meta_regexp_(x) match = pat.search(src) if match: isbn = match.group(1) break if isbn: mi.isbn = re.sub(r"[^0-9xX]", "", isbn) # LANGUAGE language = None pat = re.compile(r'<!--.*?LANGUAGE=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: language = match.group(1) else: for x in ("DC.language", "DCTERMS.language"): pat = get_meta_regexp_(x) match = pat.search(src) if match: language = match.group(1) break if language: mi.language = language # PUBDATE pubdate = None pat = re.compile(r'<!--.*?PUBDATE=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: pubdate = match.group(1) else: for x in ( "Pubdate", "Date of publication", "DC.date.published", "DC.date.publication", "DC.date.issued", "DCTERMS.issued", ): pat = get_meta_regexp_(x) match = pat.search(src) if match: pubdate = match.group(1) break if pubdate: try: mi.pubdate = parse_date(pubdate) except: pass # TIMESTAMP timestamp = None pat = re.compile(r'<!--.*?TIMESTAMP=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: timestamp = match.group(1) else: for x in ("Timestamp", "Date of creation", "DC.date.created", "DC.date.creation", "DCTERMS.created"): pat = get_meta_regexp_(x) match = pat.search(src) if match: timestamp = match.group(1) break if timestamp: try: mi.timestamp = parse_date(timestamp) except: pass # SERIES series = None pat = re.compile(r'<!--.*?SERIES=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: series = match.group(1) else: pat = get_meta_regexp_("Series") match = pat.search(src) if match: series = match.group(1) if series: pat = re.compile(r"\[([.0-9]+)\]") match = pat.search(series) series_index = None if match is not None: try: series_index = float(match.group(1)) except: pass series = series.replace(match.group(), "").strip() mi.series = ent_pat.sub(entity_to_unicode, series) if series_index is None: pat = get_meta_regexp_("Seriesnumber") match = pat.search(src) if match: try: series_index = float(match.group(1)) except: pass if series_index is not None: mi.series_index = series_index # RATING rating = None pat = re.compile(r'<!--.*?RATING=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: rating = match.group(1) else: pat = get_meta_regexp_("Rating") match = pat.search(src) if match: rating = match.group(1) if rating: try: mi.rating = float(rating) if mi.rating < 0: mi.rating = 0 if mi.rating > 5: mi.rating /= 2.0 if mi.rating > 5: mi.rating = 0 except: pass # COMMENTS comments = None pat = re.compile(r'<!--.*?COMMENTS=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: comments = match.group(1) else: pat = get_meta_regexp_("Comments") match = pat.search(src) if match: comments = match.group(1) if comments: mi.comments = ent_pat.sub(entity_to_unicode, comments) # TAGS tags = None pat = re.compile(r'<!--.*?TAGS=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: tags = match.group(1) else: pat = get_meta_regexp_("Tags") match = pat.search(src) if match: tags = match.group(1) if tags: mi.tags = [x.strip() for x in ent_pat.sub(entity_to_unicode, tags).split(",")] # Ready to return MetaInformation return mi
def _do_split(self, db, source_id, misource, splitepub, origlines, newspecs, deftitle=None): linenums, changedtocs, checkedalways = newspecs # logger.debug("updated tocs:%s"%changedtocs) if not self.has_lines(linenums): return #logger.debug("2:%s"%(time.time()-self.t)) self.t = time.time() #logger.debug("linenums:%s"%linenums) defauthors = None if not deftitle and prefs['copytoctitle']: if linenums[0] in changedtocs: deftitle=changedtocs[linenums[0]][0] # already unicoded()'ed elif len(origlines[linenums[0]]['toc']) > 0: deftitle=unicode(origlines[linenums[0]]['toc'][0]) #logger.debug("deftitle:%s"%deftitle) if not deftitle and prefs['copytitle']: deftitle = _("%s Split") % misource.title if prefs['copyauthors']: defauthors = misource.authors mi = MetaInformation(deftitle,defauthors) if prefs['copytags']: mi.tags = misource.tags # [item for sublist in tagslists for item in sublist] if prefs['copylanguages']: mi.languages = misource.languages if prefs['copyseries']: mi.series = misource.series if prefs['copydate']: mi.timestamp = misource.timestamp if prefs['copyrating']: mi.rating = misource.rating if prefs['copypubdate']: mi.pubdate = misource.pubdate if prefs['copypublisher']: mi.publisher = misource.publisher if prefs['copyidentifiers']: mi.set_identifiers(misource.get_identifiers()) if prefs['copycomments'] and misource.comments: mi.comments = "<p>"+_("Split from:")+"</p>" + misource.comments #logger.debug("mi:%s"%mi) book_id = db.create_book_entry(mi, add_duplicates=True) if prefs['copycover'] and misource.has_cover: db.set_cover(book_id, db.cover(source_id,index_is_id=True)) #logger.debug("3:%s"%(time.time()-self.t)) self.t = time.time() custom_columns = self.gui.library_view.model().custom_columns for col, action in prefs['custom_cols'].iteritems(): #logger.debug("col: %s action: %s"%(col,action)) if col not in custom_columns: #logger.debug("%s not an existing column, skipping."%col) continue coldef = custom_columns[col] #logger.debug("coldef:%s"%coldef) label = coldef['label'] value = db.get_custom(source_id, label=label, index_is_id=True) if value: db.set_custom(book_id,value,label=label,commit=False) #logger.debug("3.5:%s"%(time.time()-self.t)) self.t = time.time() if prefs['sourcecol'] != '' and prefs['sourcecol'] in custom_columns \ and prefs['sourcetemplate']: val = SafeFormat().safe_format(prefs['sourcetemplate'], misource, 'EpubSplit Source Template Error', misource) #logger.debug("Attempting to set %s to %s"%(prefs['sourcecol'],val)) label = custom_columns[prefs['sourcecol']]['label'] if custom_columns[prefs['sourcecol']]['datatype'] == 'series': val = val + (" [%s]"%self.book_count) db.set_custom(book_id, val, label=label, commit=False) self.book_count = self.book_count+1 db.commit() #logger.debug("4:%s"%(time.time()-self.t)) self.t = time.time() self.gui.library_view.model().books_added(1) self.gui.library_view.select_rows([book_id]) #logger.debug("5:%s"%(time.time()-self.t)) self.t = time.time() editconfig_txt = _('You can enable or disable Edit Metadata in Preferences > Plugins > EpubSplit.') if prefs['editmetadata']: confirm('\n'+_('''The book for the new Split EPUB has been created and default metadata filled in. However, the EPUB will *not* be created until after you've reviewed, edited, and closed the metadata dialog that follows. You can fill in the metadata yourself, or use download metadata for known books. If you download or add a cover image, it will be included in the generated EPUB.''')+'\n\n'+ editconfig_txt+'\n', 'epubsplit_created_now_edit_again', self.gui) self.gui.iactions['Edit Metadata'].edit_metadata(False) try: QApplication.setOverrideCursor(QCursor(Qt.WaitCursor)) #logger.debug("5:%s"%(time.time()-self.t)) self.t = time.time() self.gui.tags_view.recount() self.gui.status_bar.show_message(_('Splitting off from EPUB...'), 60000) mi = db.get_metadata(book_id,index_is_id=True) outputepub = PersistentTemporaryFile(suffix='.epub') coverjpgpath = None if mi.has_cover: # grab the path to the real image. coverjpgpath = os.path.join(db.library_path, db.path(book_id, index_is_id=True), 'cover.jpg') outlist = list(set(linenums + checkedalways)) outlist.sort() splitepub.write_split_epub(outputepub, outlist, changedtocs=changedtocs, authoropts=mi.authors, titleopt=mi.title, descopt=mi.comments, tags=mi.tags, languages=mi.languages, coverjpgpath=coverjpgpath) #logger.debug("6:%s"%(time.time()-self.t)) self.t = time.time() db.add_format_with_hooks(book_id, 'EPUB', outputepub, index_is_id=True) #logger.debug("7:%s"%(time.time()-self.t)) self.t = time.time() self.gui.status_bar.show_message(_('Finished splitting off EPUB.'), 3000) self.gui.library_view.model().refresh_ids([book_id]) self.gui.tags_view.recount() current = self.gui.library_view.currentIndex() self.gui.library_view.model().current_changed(current, self.previous) finally: QApplication.restoreOverrideCursor() if not prefs['editmetadata']: confirm('<p>'+ '</p><p>'.join([_('<b><u>%s</u> by %s</b> has been created and default metadata filled in.')%(mi.title,', '.join(mi.authors)), _('EpubSplit now skips the Edit Metadata step by default.'), editconfig_txt])+ '</p>', 'epubsplit_created_now_no_edit_again', self.gui)
def get_metadata(stream, cover=True): with TemporaryDirectory('_pdf_metadata_read') as pdfpath: stream.seek(0) with open(os.path.join(pdfpath, 'src.pdf'), 'wb') as f: shutil.copyfileobj(stream, f) try: res = fork_job('calibre.ebooks.metadata.pdf', 'read_info', (pdfpath, bool(cover))) except WorkerError as e: prints(e.orig_tb) raise RuntimeError('Failed to run pdfinfo') info = res['result'] with open(res['stdout_stderr'], 'rb') as f: raw = f.read().strip() if raw: prints(raw) if info is None: raise ValueError('Could not read info dict from PDF') covpath = os.path.join(pdfpath, 'cover.jpg') cdata = None if cover and os.path.exists(covpath): with open(covpath, 'rb') as f: cdata = f.read() title = info.get('Title', None) or _('Unknown') au = info.get('Author', None) if au is None: au = [_('Unknown')] else: au = string_to_authors(au) mi = MetaInformation(title, au) # if isbn is not None: # mi.isbn = isbn creator = info.get('Creator', None) if creator: mi.book_producer = creator keywords = info.get('Keywords', None) mi.tags = [] if keywords: mi.tags = [x.strip() for x in keywords.split(',')] isbn = [check_isbn(x) for x in mi.tags if check_isbn(x)] if isbn: mi.isbn = isbn = isbn[0] mi.tags = [x for x in mi.tags if check_isbn(x) != isbn] subject = info.get('Subject', None) if subject: mi.tags.insert(0, subject) if 'xmp_metadata' in info: from calibre.ebooks.metadata.xmp import consolidate_metadata mi = consolidate_metadata(mi, info) # Look for recognizable identifiers in the info dict, if they were not # found in the XMP metadata for scheme, check_func in iteritems({ 'doi': check_doi, 'isbn': check_isbn }): if scheme not in mi.get_identifiers(): for k, v in iteritems(info): if k != 'xmp_metadata': val = check_func(v) if val: mi.set_identifier(scheme, val) break if cdata: mi.cover_data = ('jpeg', cdata) return mi
def _do_split(self, db, source_id, misource, splitepub, newspecs, deftitle=None, editmeta=True): linenums, changedtocs = newspecs # logger.debug("updated tocs:%s"%changedtocs) # logger.debug("2:%s"%(time.time()-self.t)) self.t = time.time() # logger.debug("linenums:%s"%linenums) defauthors = None if not deftitle and prefs['copytitle']: deftitle = _("نمونه %s") % misource.title if prefs['copyauthors']: defauthors = misource.authors mi = MetaInformation(deftitle, defauthors) if prefs['copytags']: mi.tags = misource.tags # [item for sublist in tagslists for item in sublist] if prefs['copylanguages']: mi.languages = misource.languages if prefs['copyseries']: mi.series = misource.series if prefs['copydate']: mi.timestamp = misource.timestamp if prefs['copyrating']: mi.rating = misource.rating if prefs['copypubdate']: mi.pubdate = misource.pubdate if prefs['copypublisher']: mi.publisher = misource.publisher if prefs['copyidentifiers']: mi.set_identifiers(misource.get_identifiers()) if prefs['copycomments'] and misource.comments: mi.comments = _("Split from:") + "\n\n" + misource.comments # logger.debug("mi:%s"%mi) book_id = db.create_book_entry(mi, add_duplicates=True) if prefs['copycover'] and misource.has_cover: db.set_cover(book_id, db.cover(source_id, index_is_id=True)) # logger.debug("3:%s"%(time.time()-self.t)) self.t = time.time() custom_columns = self.gui.library_view.model().custom_columns for col, action in prefs['custom_cols'].iteritems(): # logger.debug("col: %s action: %s"%(col,action)) if col not in custom_columns: # logger.debug("%s not an existing column, skipping."%col) continue coldef = custom_columns[col] # logger.debug("coldef:%s"%coldef) label = coldef['label'] value = db.get_custom(source_id, label=label, index_is_id=True) if value: db.set_custom(book_id, value, label=label, commit=False) # logger.debug("3.5:%s"%(time.time()-self.t)) self.t = time.time() if prefs['sourcecol'] != '' and prefs['sourcecol'] in custom_columns \ and prefs['sourcetemplate']: val = SafeFormat().safe_format(prefs['sourcetemplate'], misource, 'EpubSplit Source Template Error', misource) # logger.debug("Attempting to set %s to %s"%(prefs['sourcecol'],val)) label = custom_columns[prefs['sourcecol']]['label'] db.set_custom(book_id, val, label=label, commit=False) db.commit() # logger.debug("4:%s"%(time.time()-self.t)) self.t = time.time() self.gui.library_view.model().books_added(1) self.gui.library_view.select_rows([book_id]) # logger.debug("5:%s"%(time.time()-self.t)) self.t = time.time() # if editmeta: # confirm('\n'+_('کتاب نمونه ساخته شود؟')+'\n', # 'epubsplit_created_now_edit_again', # self.gui) # # self.gui.iactions['Edit Metadata'].edit_metadata(False) # logger.debug("5:%s"%(time.time()-self.t)) self.t = time.time() self.gui.tags_view.recount() self.gui.status_bar.show_message(_('فایل نمونه ساخته شد'), 60000) mi = db.get_metadata(book_id, index_is_id=True) outputepub = PersistentTemporaryFile(suffix='.epub') coverjpgpath = None # if mi.has_cover: # # grab the path to the real image. # coverjpgpath = os.path.join(db.library_path, db.path(book_id, index_is_id=True), 'cover.jpg') splitepub.write_split_epub(outputepub, linenums, changedtocs=changedtocs, authoropts=mi.authors, titleopt=mi.title, descopt=mi.comments, tags=mi.tags, languages=mi.languages, coverjpgpath=coverjpgpath) # logger.debug("6:%s"%(time.time()-self.t)) self.t = time.time() db.add_format_with_hooks(book_id, 'EPUB', outputepub, index_is_id=True) # logger.debug("7:%s"%(time.time()-self.t)) self.t = time.time() self.gui.status_bar.show_message(_('Finished splitting off EPUB.'), 3000) self.gui.library_view.model().refresh_ids([book_id]) self.gui.tags_view.recount() current = self.gui.library_view.currentIndex() self.gui.library_view.model().current_changed(current, self.previous)
def _start_merge(self, book_list): db = self.gui.current_db self.previous = self.gui.library_view.currentIndex() # if any bad, bail. bad_list = filter(lambda x: not x['good'], book_list) if len(bad_list) > 0: d = error_dialog(self.gui, _('Cannot Merge Epubs'), _('%s books failed.') % len(bad_list), det_msg='\n'.join( map(lambda x: x['error'], bad_list))) d.exec_() else: d = OrderEPUBsDialog( self.gui, _('Order EPUBs to Merge'), prefs, self.qaction.icon(), book_list, ) d.exec_() if d.result() != d.Accepted: return book_list = d.get_books() logger.debug("2:%s" % (time.time() - self.t)) self.t = time.time() deftitle = "%s %s" % (book_list[0]['title'], prefs['mergeword']) mi = MetaInformation(deftitle, ["Temp Author"]) # if all same series, use series for name. But only if all. serieslist = map(lambda x: x['series'], filter(lambda x: x['series'] != None, book_list)) if len(serieslist) == len(book_list): mi.title = serieslist[0] for sr in serieslist: if mi.title != sr: mi.title = deftitle break # logger.debug("======================= mi.title:\n%s\n========================="%mi.title) mi.authors = list() authorslists = map(lambda x: x['authors'], book_list) for l in authorslists: for a in l: if a not in mi.authors: mi.authors.append(a) #mi.authors = [item for sublist in authorslists for item in sublist] # logger.debug("======================= mi.authors:\n%s\n========================="%mi.authors) #mi.author_sort = ' & '.join(map(lambda x : x['author_sort'], book_list)) # logger.debug("======================= mi.author_sort:\n%s\n========================="%mi.author_sort) # set publisher if all from same publisher. publishers = set(map(lambda x: x['publisher'], book_list)) if len(publishers) == 1: mi.publisher = publishers.pop() # logger.debug("======================= mi.publisher:\n%s\n========================="%mi.publisher) tagslists = map(lambda x: x['tags'], book_list) mi.tags = [item for sublist in tagslists for item in sublist] mi.tags.extend(prefs['mergetags'].split(',')) # logger.debug("======================= mergetags:\n%s\n========================="%prefs['mergetags']) # logger.debug("======================= m.tags:\n%s\n========================="%mi.tags) languageslists = map(lambda x: x['languages'], book_list) mi.languages = [ item for sublist in languageslists for item in sublist ] mi.series = '' # ======================= make book comments ========================= if len(mi.authors) > 1: booktitle = lambda x: _("%s by %s") % (x['title'], ' & '.join( x['authors'])) else: booktitle = lambda x: x['title'] mi.comments = (_("%s containing:") + "\n\n") % prefs['mergeword'] if prefs['includecomments']: def bookcomments(x): if x['comments']: return '<b>%s</b>\n\n%s' % (booktitle(x), x['comments']) else: return '<b>%s</b>\n' % booktitle(x) mi.comments += ('<div class="mergedbook">' + '<hr></div><div class="mergedbook">'.join( [bookcomments(x) for x in book_list]) + '</div>') else: mi.comments += '\n'.join([booktitle(x) for x in book_list]) # ======================= make book entry ========================= book_id = db.create_book_entry(mi, add_duplicates=True) # set default cover to same as first book coverdata = db.cover(book_list[0]['calibre_id'], index_is_id=True) if coverdata: db.set_cover(book_id, coverdata) # ======================= custom columns =================== logger.debug("3:%s" % (time.time() - self.t)) self.t = time.time() # have to get custom from db for each book. idslist = map(lambda x: x['calibre_id'], book_list) custom_columns = self.gui.library_view.model().custom_columns for col, action in prefs['custom_cols'].iteritems(): #logger.debug("col: %s action: %s"%(col,action)) if col not in custom_columns: logger.debug("%s not an existing column, skipping." % col) continue coldef = custom_columns[col] #logger.debug("coldef:%s"%coldef) if action not in permitted_values[coldef['datatype']]: logger.debug( "%s not a valid column type for %s, skipping." % (col, action)) continue label = coldef['label'] found = False value = None idx = None if action == 'first': idx = 0 if action == 'last': idx = -1 if action in ['first', 'last']: value = db.get_custom(idslist[idx], label=label, index_is_id=True) if coldef['datatype'] == 'series' and value != None: # get the number-in-series, too. value = "%s [%s]" % ( value, db.get_custom_extra( idslist[idx], label=label, index_is_id=True)) found = True if action in ('add', 'average', 'averageall'): value = 0.0 count = 0 for bid in idslist: try: value += db.get_custom(bid, label=label, index_is_id=True) found = True # only count ones with values unless averageall count += 1 except: # if not set, it's None and fails. # only count ones with values unless averageall if action == 'averageall': count += 1 if found and action in ('average', 'averageall'): value = value / count if coldef['datatype'] == 'int': value += 0.5 # so int rounds instead of truncs. if action == 'and': value = True for bid in idslist: try: value = value and db.get_custom( bid, label=label, index_is_id=True) found = True except: # if not set, it's None and fails. pass if action == 'or': value = False for bid in idslist: try: value = value or db.get_custom( bid, label=label, index_is_id=True) found = True except: # if not set, it's None and fails. pass if action == 'newest': value = None for bid in idslist: try: ivalue = db.get_custom(bid, label=label, index_is_id=True) if not value or ivalue > value: value = ivalue found = True except: # if not set, it's None and fails. pass if action == 'oldest': value = None for bid in idslist: try: ivalue = db.get_custom(bid, label=label, index_is_id=True) if not value or ivalue < value: value = ivalue found = True except: # if not set, it's None and fails. pass if action == 'union': if not coldef['is_multiple']: action = 'concat' else: value = set() for bid in idslist: try: value = value.union( db.get_custom(bid, label=label, index_is_id=True)) found = True except: # if not set, it's None and fails. pass if action == 'concat': value = "" for bid in idslist: try: value = value + ' ' + db.get_custom( bid, label=label, index_is_id=True) found = True except: # if not set, it's None and fails. pass value = value.strip() if found and value != None: db.set_custom(book_id, value, label=label, commit=False) db.commit() logger.debug("4:%s" % (time.time() - self.t)) self.t = time.time() self.gui.library_view.model().books_added(1) self.gui.library_view.select_rows([book_id]) logger.debug("5:%s" % (time.time() - self.t)) self.t = time.time() confirm( '\n' + _('''The book for the new Merged EPUB has been created and default metadata filled in. However, the EPUB will *not* be created until after you've reviewed, edited, and closed the metadata dialog that follows.''' ), 'epubmerge_created_now_edit_again', self.gui) self.gui.iactions['Edit Metadata'].edit_metadata(False) logger.debug("5:%s" % (time.time() - self.t)) self.t = time.time() self.gui.tags_view.recount() totalsize = sum(map(lambda x: x['epub_size'], book_list)) logger.debug("merging %s EPUBs totaling %s" % (len(book_list), gethumanreadable(totalsize))) if len(book_list) > 100 or totalsize > 5 * 1024 * 1024: confirm( '\n' + _('''You're merging %s EPUBs totaling %s. Calibre will be locked until the merge is finished.''' ) % (len(book_list), gethumanreadable(totalsize)), 'epubmerge_edited_now_merge_again', self.gui) self.gui.status_bar.show_message( _('Merging %s EPUBs...') % len(book_list), 60000) mi = db.get_metadata(book_id, index_is_id=True) mergedepub = PersistentTemporaryFile(suffix='.epub') epubstomerge = map(lambda x: x['epub'], book_list) coverjpgpath = None if mi.has_cover: # grab the path to the real image. coverjpgpath = os.path.join(db.library_path, db.path(book_id, index_is_id=True), 'cover.jpg') self.do_merge(mergedepub, epubstomerge, authoropts=mi.authors, titleopt=mi.title, descopt=mi.comments, tags=mi.tags, languages=mi.languages, titlenavpoints=prefs['titlenavpoints'], flattentoc=prefs['flattentoc'], printtimes=True, coverjpgpath=coverjpgpath, keepmetadatafiles=prefs['keepmeta']) logger.debug("6:%s" % (time.time() - self.t)) logger.debug(_("Merge finished, output in:\n%s") % mergedepub.name) self.t = time.time() db.add_format_with_hooks(book_id, 'EPUB', mergedepub, index_is_id=True) logger.debug("7:%s" % (time.time() - self.t)) self.t = time.time() self.gui.status_bar.show_message( _('Finished merging %s EPUBs.') % len(book_list), 3000) self.gui.library_view.model().refresh_ids([book_id]) self.gui.tags_view.recount() current = self.gui.library_view.currentIndex() self.gui.library_view.model().current_changed( current, self.previous)
def convert_comic_md_to_calibre_md(self, comic_metadata): ''' Maps the entries in the comic_metadata to calibre metadata ''' import unicodedata from calibre.ebooks.metadata import MetaInformation from calibre.utils.date import parse_only_date from datetime import date from calibre.utils.localization import calibre_langcode_to_name if self.comic_md_in_calibre_format: return # start with a fresh calibre metadata mi = MetaInformation(None, None) co = comic_metadata # shorten some functions role = partial(get_role, credits=co.credits) update_field = partial(update_calibre_field, target=mi) # Get title, if no title, try to assign series infos if co.title: mi.title = co.title elif co.series: mi.title = co.series if co.issue: mi.title += " " + str(co.issue) else: mi.title = "" # tags if co.tags != [] and prefs['import_tags']: if prefs['overwrite_calibre_tags']: mi.tags = co.tags else: mi.tags = list(set(self.calibre_metadata.tags + co.tags)) # simple metadata update_field("authors", role(WRITER)) update_field("series", co.series) update_field("rating", co.criticalRating) update_field("publisher", co.publisher) # special cases if co.language: update_field("language", calibre_langcode_to_name(co.language)) if co.comments: update_field("comments", co.comments.strip()) # issue if co.issue: try: if not python3 and isinstance(co.issue, unicode): mi.series_index = unicodedata.numeric(co.issue) else: mi.series_index = float(co.issue) except ValueError: pass # pub date puby = co.year pubm = co.month if puby is not None: try: dt = date(int(puby), 6 if pubm is None else int(pubm), 15) dt = parse_only_date(str(dt)) mi.pubdate = dt except: pass # custom columns update_column = partial( update_custom_column, calibre_metadata=mi, custom_cols=self.db.field_metadata.custom_field_metadata()) # artists update_column(prefs['penciller_column'], role(PENCILLER)) update_column(prefs['inker_column'], role(INKER)) update_column(prefs['colorist_column'], role(COLORIST)) update_column(prefs['letterer_column'], role(LETTERER)) update_column(prefs['cover_artist_column'], role(COVER_ARTIST)) update_column(prefs['editor_column'], role(EDITOR)) # others update_column(prefs['storyarc_column'], co.storyArc) update_column(prefs['characters_column'], co.characters) update_column(prefs['teams_column'], co.teams) update_column(prefs['locations_column'], co.locations) update_column(prefs['genre_column'], co.genre) ensure_int(co.issueCount, update_column, prefs['count_column'], co.issueCount) ensure_int(co.volume, update_column, prefs['volume_column'], co.volume) if prefs['auto_count_pages']: update_column(prefs['pages_column'], self.count_pages()) else: update_column(prefs['pages_column'], co.pageCount) if prefs['get_image_sizes']: update_column(prefs['image_size_column'], self.get_picture_size()) update_column(prefs['comicvine_column'], '<a href="{}">Comic Vine</a>'.format(co.webLink)) update_column(prefs['manga_column'], co.manga) self.comic_md_in_calibre_format = mi
def convert_comic_md_to_calibre_md(self, comic_metadata): ''' Maps the entries in the comic_metadata to calibre metadata ''' import unicodedata from calibre.ebooks.metadata import MetaInformation from calibre.utils.date import parse_only_date from datetime import date from calibre.utils.localization import calibre_langcode_to_name if self.comic_md_in_calibre_format: return # synonyms for artists WRITER = ['writer', 'plotter', 'scripter'] PENCILLER = ['artist', 'penciller', 'penciler', 'breakdowns'] INKER = ['inker', 'artist', 'finishes'] COLORIST = ['colorist', 'colourist', 'colorer', 'colourer'] LETTERER = ['letterer'] COVER_ARTIST = ['cover', 'covers', 'coverartist', 'cover artist'] EDITOR = ['editor'] # start with a fresh calibre metadata mi = MetaInformation(None, None) co = comic_metadata # shorten some functions role = partial(get_role, credits=co.credits) update_field = partial(update_calibre_field, target=mi) # Get title, if no title, try to assign series infos if co.title: mi.title = co.title elif co.series: mi.title = co.series if co.issue: mi.title += " " + str(co.issue) else: mi.title = "" # tags if co.tags != [] and prefs['import_tags']: if prefs['overwrite_calibre_tags']: mi.tags = co.tags else: mi.tags = list(set(self.calibre_metadata.tags + co.tags)) # simple metadata update_field("authors", role(WRITER)) update_field("series", co.series) update_field("rating", co.criticalRating) update_field("publisher", co.publisher) # special cases if co.language: update_field("language", calibre_langcode_to_name(co.language)) if co.comments: update_field("comments", co.comments.strip()) # issue if co.issue: if isinstance(co.issue, unicode): mi.series_index = unicodedata.numeric(co.issue) else: mi.series_index = float(co.issue) # pub date puby = co.year pubm = co.month if puby is not None: try: dt = date(int(puby), 6 if pubm is None else int(pubm), 15) dt = parse_only_date(str(dt)) mi.pubdate = dt except: pass # custom columns custom_cols = self.db.field_metadata.custom_field_metadata() update_column = partial(update_custom_column, calibre_metadata=mi, custom_cols=custom_cols) # artists update_column(prefs['penciller_column'], role(PENCILLER)) update_column(prefs['inker_column'], role(INKER)) update_column(prefs['colorist_column'], role(COLORIST)) update_column(prefs['letterer_column'], role(LETTERER)) update_column(prefs['cover_artist_column'], role(COVER_ARTIST)) update_column(prefs['editor_column'], role(EDITOR)) # others update_column(prefs['storyarc_column'], co.storyArc) update_column(prefs['characters_column'], co.characters) update_column(prefs['teams_column'], co.teams) update_column(prefs['locations_column'], co.locations) update_column(prefs['volume_column'], co.volume) update_column(prefs['genre_column'], co.genre) self.comic_md_in_calibre_format = mi
def _start_merge(self,book_list,tdir=None): db=self.gui.current_db self.previous = self.gui.library_view.currentIndex() # if any bad, bail. bad_list = [ x for x in book_list if not x['good'] ] if len(bad_list) > 0: d = error_dialog(self.gui, _('Cannot Merge Epubs'), _('%s books failed.')%len(bad_list), det_msg='\n'.join( [ x['error'] for x in bad_list ])) d.exec_() else: d = OrderEPUBsDialog(self.gui, _('Order EPUBs to Merge'), prefs, self.qaction.icon(), book_list, ) d.exec_() if d.result() != d.Accepted: return book_list = d.get_books() logger.debug("2:%s"%(time.time()-self.t)) self.t = time.time() deftitle = "%s %s" % (book_list[0]['title'],prefs['mergeword']) mi = MetaInformation(deftitle,["Temp Author"]) # if all same series, use series for name. But only if all. serieslist = [ x['series'] for x in book_list if x['series'] != None ] if len(serieslist) == len(book_list): mi.title = serieslist[0] for sr in serieslist: if mi.title != sr: mi.title = deftitle; break # logger.debug("======================= mi.title:\n%s\n========================="%mi.title) mi.authors = list() authorslists = [ x['authors'] for x in book_list ] for l in authorslists: for a in l: if a not in mi.authors: mi.authors.append(a) #mi.authors = [item for sublist in authorslists for item in sublist] # logger.debug("======================= mi.authors:\n%s\n========================="%mi.authors) #mi.author_sort = ' & '.join([ x['author_sort'] for x in book_list ]) # logger.debug("======================= mi.author_sort:\n%s\n========================="%mi.author_sort) # set publisher if all from same publisher. publishers = set([ x['publisher'] for x in book_list ]) if len(publishers) == 1: mi.publisher = publishers.pop() # logger.debug("======================= mi.publisher:\n%s\n========================="%mi.publisher) tagslists = [ x['tags'] for x in book_list ] mi.tags = [item for sublist in tagslists for item in sublist] mi.tags.extend(prefs['mergetags'].split(',')) # logger.debug("======================= mergetags:\n%s\n========================="%prefs['mergetags']) # logger.debug("======================= m.tags:\n%s\n========================="%mi.tags) languageslists = [ x['languages'] for x in book_list ] mi.languages = [item for sublist in languageslists for item in sublist] mi.series = '' if prefs['firstseries'] and book_list[0]['series']: mi.series = book_list[0]['series'] mi.series_index = book_list[0]['series_index'] # ======================= make book comments ========================= if len(mi.authors) > 1: booktitle = lambda x : _("%s by %s") % (x['title'],' & '.join(x['authors'])) else: booktitle = lambda x : x['title'] mi.comments = ("<p>"+_("%s containing:")+"</p>") % prefs['mergeword'] if prefs['includecomments']: def bookcomments(x): if x['comments']: return '<p><b>%s</b></p>%s'%(booktitle(x),x['comments']) else: return '<b>%s</b><br/>'%booktitle(x) mi.comments += ('<div class="mergedbook">' + '<hr></div><div class="mergedbook">'.join([ bookcomments(x) for x in book_list]) + '</div>') else: mi.comments += '<br/>'.join( [ booktitle(x) for x in book_list ] ) # ======================= make book entry ========================= book_id = db.create_book_entry(mi, add_duplicates=True) # set default cover to same as first book coverdata = db.cover(book_list[0]['calibre_id'],index_is_id=True) if coverdata: db.set_cover(book_id, coverdata) # ======================= custom columns =================== logger.debug("3:%s"%(time.time()-self.t)) self.t = time.time() # have to get custom from db for each book. idslist = [ x['calibre_id'] for x in book_list ] custom_columns = self.gui.library_view.model().custom_columns for col, action in six.iteritems(prefs['custom_cols']): #logger.debug("col: %s action: %s"%(col,action)) if col not in custom_columns: logger.debug("%s not an existing column, skipping."%col) continue coldef = custom_columns[col] #logger.debug("coldef:%s"%coldef) if action not in permitted_values[coldef['datatype']]: logger.debug("%s not a valid column type for %s, skipping."%(col,action)) continue label = coldef['label'] found = False value = None idx = None if action == 'first': idx = 0 if action == 'last': idx = -1 if action in ['first','last']: value = db.get_custom(idslist[idx], label=label, index_is_id=True) if coldef['datatype'] == 'series' and value != None: # get the number-in-series, too. value = "%s [%s]"%(value, db.get_custom_extra(idslist[idx], label=label, index_is_id=True)) found = True if action in ('add','average','averageall'): value = 0.0 count = 0 for bid in idslist: try: value += db.get_custom(bid, label=label, index_is_id=True) found = True # only count ones with values unless averageall count += 1 except: # if not set, it's None and fails. # only count ones with values unless averageall if action == 'averageall': count += 1 if found and action in ('average','averageall'): value = value / count if coldef['datatype'] == 'int': value += 0.5 # so int rounds instead of truncs. if action == 'and': value = True for bid in idslist: try: value = value and db.get_custom(bid, label=label, index_is_id=True) found = True except: # if not set, it's None and fails. pass if action == 'or': value = False for bid in idslist: try: value = value or db.get_custom(bid, label=label, index_is_id=True) found = True except: # if not set, it's None and fails. pass if action == 'newest': value = None for bid in idslist: try: ivalue = db.get_custom(bid, label=label, index_is_id=True) if not value or ivalue > value: value = ivalue found = True except: # if not set, it's None and fails. pass if action == 'oldest': value = None for bid in idslist: try: ivalue = db.get_custom(bid, label=label, index_is_id=True) if not value or ivalue < value: value = ivalue found = True except: # if not set, it's None and fails. pass if action == 'union': if not coldef['is_multiple']: action = 'concat' else: value = set() for bid in idslist: try: value = value.union(db.get_custom(bid, label=label, index_is_id=True)) found = True except: # if not set, it's None and fails. pass if action == 'concat': value = "" for bid in idslist: try: value = value + ' ' + db.get_custom(bid, label=label, index_is_id=True) found = True except: # if not set, it's None and fails. pass value = value.strip() if action == 'now': value = datetime.now() found = True logger.debug("now: %s"%value) if found and value != None: logger.debug("value: %s"%value) db.set_custom(book_id,value,label=label,commit=False) db.commit() logger.debug("4:%s"%(time.time()-self.t)) self.t = time.time() self.gui.library_view.model().books_added(1) self.gui.library_view.select_rows([book_id]) logger.debug("5:%s"%(time.time()-self.t)) self.t = time.time() confirm('\n'+_('''The book for the new Merged EPUB has been created and default metadata filled in. However, the EPUB will *not* be created until after you've reviewed, edited, and closed the metadata dialog that follows.'''), 'epubmerge_created_now_edit_again', self.gui, title=_("EpubMerge"), show_cancel_button=False) self.gui.iactions['Edit Metadata'].edit_metadata(False) logger.debug("5:%s"%(time.time()-self.t)) self.t = time.time() self.gui.tags_view.recount() totalsize = sum([ x['epub_size'] for x in book_list ]) logger.debug("merging %s EPUBs totaling %s"%(len(book_list),gethumanreadable(totalsize))) confirm('\n'+_('''EpubMerge will be done in a Background job. The merged EPUB will not appear in the Library until finished. You are merging %s EPUBs totaling %s.''')%(len(book_list),gethumanreadable(totalsize)), 'epubmerge_background_merge_again', self.gui, title=_("EpubMerge"), show_cancel_button=False) # if len(book_list) > 100 or totalsize > 5*1024*1024: # confirm('\n'+_('''You're merging %s EPUBs totaling %s. Calibre will be locked until the merge is finished.''')%(len(book_list),gethumanreadable(totalsize)), # 'epubmerge_edited_now_merge_again', # self.gui) self.gui.status_bar.show_message(_('Merging %s EPUBs...')%len(book_list), 60000) mi = db.get_metadata(book_id,index_is_id=True) mergedepub = PersistentTemporaryFile(prefix="output_", suffix='.epub', dir=tdir) epubstomerge = [ x['epub'] for x in book_list ] epubtitles = {} for x in book_list: # save titles indexed by epub for reporting from BG epubtitles[x['epub']]=_("%s by %s") % (x['title'],' & '.join(x['authors'])) coverjpgpath = None if mi.has_cover: # grab the path to the real image. coverjpgpath = os.path.join(db.library_path, db.path(book_id, index_is_id=True), 'cover.jpg') func = 'arbitrary_n' cpus = self.gui.job_manager.server.pool_size args = ['calibre_plugins.epubmerge.jobs', 'do_merge_bg', ({'book_id':book_id, 'book_count':len(book_list), 'tdir':tdir, 'outputepubfn':mergedepub.name, 'inputepubfns':epubstomerge, # already .name'ed 'epubtitles':epubtitles, # for reporting 'authoropts':mi.authors, 'titleopt':mi.title, 'descopt':mi.comments, 'tags':mi.tags, 'languages':mi.languages, 'titlenavpoints':prefs['titlenavpoints'], 'originalnavpoints':prefs['originalnavpoints'], 'flattentoc':prefs['flattentoc'], 'printtimes':True, 'coverjpgpath':coverjpgpath, 'keepmetadatafiles':prefs['keepmeta'] }, cpus)] desc = _('EpubMerge: %s')%mi.title job = self.gui.job_manager.run_job( self.Dispatcher(self.merge_done), func, args=args, description=desc) self.gui.jobs_pointer.start() self.gui.status_bar.show_message(_('Starting EpubMerge'),3000)
def get_metadata(stream, cover=True): with TemporaryDirectory('_pdf_metadata_read') as pdfpath: stream.seek(0) with open(os.path.join(pdfpath, 'src.pdf'), 'wb') as f: shutil.copyfileobj(stream, f) try: res = fork_job('calibre.ebooks.metadata.pdf', 'read_info', (pdfpath, bool(cover))) except WorkerError as e: prints(e.orig_tb) raise RuntimeError('Failed to run pdfinfo') info = res['result'] with open(res['stdout_stderr'], 'rb') as f: raw = f.read().strip() if raw: prints(raw) if not info: raise ValueError('Could not read info dict from PDF') covpath = os.path.join(pdfpath, 'cover.jpg') cdata = None if cover and os.path.exists(covpath): with open(covpath, 'rb') as f: cdata = f.read() title = info.get('Title', None) au = info.get('Author', None) if au is None: au = [_('Unknown')] else: au = string_to_authors(au) mi = MetaInformation(title, au) # if isbn is not None: # mi.isbn = isbn creator = info.get('Creator', None) if creator: mi.book_producer = creator keywords = info.get('Keywords', None) mi.tags = [] if keywords: mi.tags = [x.strip() for x in keywords.split(',')] isbn = [check_isbn(x) for x in mi.tags if check_isbn(x)] if isbn: mi.isbn = isbn = isbn[0] mi.tags = [x for x in mi.tags if check_isbn(x) != isbn] subject = info.get('Subject', None) if subject: mi.tags.insert(0, subject) if 'xmp_metadata' in info: from calibre.ebooks.metadata.xmp import consolidate_metadata mi = consolidate_metadata(mi, info) # Look for recognizable identifiers in the info dict, if they were not # found in the XMP metadata for scheme, check_func in {'doi':check_doi, 'isbn':check_isbn}.iteritems(): if scheme not in mi.get_identifiers(): for k, v in info.iteritems(): if k != 'xmp_metadata': val = check_func(v) if val: mi.set_identifier(scheme, val) break if cdata: mi.cover_data = ('jpeg', cdata) return mi
def get_metadata_(src, encoding=None): if not isinstance(src, unicode): if not encoding: src = xml_to_unicode(src)[0] else: src = src.decode(encoding, 'replace') # Meta data definitions as in # http://www.mobileread.com/forums/showpost.php?p=712544&postcount=9 # Title title = None pat = re.compile(r'<!--.*?TITLE=(?P<q>[\'"])(.+?)(?P=q).*?-->', re.DOTALL) src = src[:150000] # Searching shouldn't take too long match = pat.search(src) if match: title = match.group(2) else: for x in ('DC.title', 'DCTERMS.title', 'Title'): pat = get_meta_regexp_(x) match = pat.search(src) if match: title = match.group(1) break if not title: pat = re.compile('<title>([^<>]+?)</title>', re.IGNORECASE) match = pat.search(src) if match: title = match.group(1) # Author author = None pat = re.compile(r'<!--.*?AUTHOR=(?P<q>[\'"])(.+?)(?P=q).*?-->', re.DOTALL) match = pat.search(src) if match: author = match.group(2).replace(',', ';') else: for x in ('Author', 'DC.creator.aut', 'DCTERMS.creator.aut', 'DC.creator'): pat = get_meta_regexp_(x) match = pat.search(src) if match: author = match.group(1) break # Create MetaInformation with Title and Author ent_pat = re.compile(r'&(\S+)?;') if title: title = ent_pat.sub(entity_to_unicode, title) if author: author = ent_pat.sub(entity_to_unicode, author) mi = MetaInformation(title, [author] if author else None) # Publisher publisher = None pat = re.compile(r'<!--.*?PUBLISHER=(?P<q>[\'"])(.+?)(?P=q).*?-->', re.DOTALL) match = pat.search(src) if match: publisher = match.group(2) else: for x in ('Publisher', 'DC.publisher', 'DCTERMS.publisher'): pat = get_meta_regexp_(x) match = pat.search(src) if match: publisher = match.group(1) break if publisher: mi.publisher = ent_pat.sub(entity_to_unicode, publisher) # ISBN isbn = None pat = re.compile(r'<!--.*?ISBN=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: isbn = match.group(1) else: for x in ('ISBN', 'DC.identifier.ISBN', 'DCTERMS.identifier.ISBN'): pat = get_meta_regexp_(x) match = pat.search(src) if match: isbn = match.group(1) break if isbn: mi.isbn = re.sub(r'[^0-9xX]', '', isbn) # LANGUAGE language = None pat = re.compile(r'<!--.*?LANGUAGE=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: language = match.group(1) else: for x in ('DC.language', 'DCTERMS.language'): pat = get_meta_regexp_(x) match = pat.search(src) if match: language = match.group(1) break if language: mi.language = language # PUBDATE pubdate = None pat = re.compile(r'<!--.*?PUBDATE=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: pubdate = match.group(1) else: for x in ('Pubdate', 'Date of publication', 'DC.date.published', 'DC.date.publication', 'DC.date.issued', 'DCTERMS.issued'): pat = get_meta_regexp_(x) match = pat.search(src) if match: pubdate = match.group(1) break if pubdate: try: mi.pubdate = parse_date(pubdate) except: pass # TIMESTAMP timestamp = None pat = re.compile(r'<!--.*?TIMESTAMP=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: timestamp = match.group(1) else: for x in ('Timestamp', 'Date of creation', 'DC.date.created', 'DC.date.creation', 'DCTERMS.created'): pat = get_meta_regexp_(x) match = pat.search(src) if match: timestamp = match.group(1) break if timestamp: try: mi.timestamp = parse_date(timestamp) except: pass # SERIES series = None pat = re.compile(r'<!--.*?SERIES=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: series = match.group(1) else: pat = get_meta_regexp_("Series") match = pat.search(src) if match: series = match.group(1) if series: pat = re.compile(r'\[([.0-9]+)\]') match = pat.search(series) series_index = None if match is not None: try: series_index = float(match.group(1)) except: pass series = series.replace(match.group(), '').strip() mi.series = ent_pat.sub(entity_to_unicode, series) if series_index is None: pat = get_meta_regexp_("Seriesnumber") match = pat.search(src) if match: try: series_index = float(match.group(1)) except: pass if series_index is not None: mi.series_index = series_index # RATING rating = None pat = re.compile(r'<!--.*?RATING=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: rating = match.group(1) else: pat = get_meta_regexp_("Rating") match = pat.search(src) if match: rating = match.group(1) if rating: try: mi.rating = float(rating) if mi.rating < 0: mi.rating = 0 if mi.rating > 5: mi.rating /= 2. if mi.rating > 5: mi.rating = 0 except: pass # COMMENTS comments = None pat = re.compile(r'<!--.*?COMMENTS=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: comments = match.group(1) else: pat = get_meta_regexp_("Comments") match = pat.search(src) if match: comments = match.group(1) if comments: mi.comments = ent_pat.sub(entity_to_unicode, comments) # TAGS tags = None pat = re.compile(r'<!--.*?TAGS=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: tags = match.group(1) else: pat = get_meta_regexp_("Tags") match = pat.search(src) if match: tags = match.group(1) if tags: mi.tags = [ x.strip() for x in ent_pat.sub(entity_to_unicode, tags).split(",") ] # Ready to return MetaInformation return mi
def _start_merge(self,book_list): db=self.gui.current_db self.previous = self.gui.library_view.currentIndex() # if any bad, bail. bad_list = filter(lambda x : not x['good'], book_list) if len(bad_list) > 0: d = error_dialog(self.gui, _('Cannot Merge Epubs'), _('%s books failed.')%len(bad_list), det_msg='\n'.join(map(lambda x : x['error'] , bad_list))) d.exec_() else: d = OrderEPUBsDialog(self.gui, _('Order EPUBs to Merge'), prefs, self.qaction.icon(), book_list, ) d.exec_() if d.result() != d.Accepted: return book_list = d.get_books() print("2:%s"%(time.time()-self.t)) self.t = time.time() deftitle = "%s %s" % (book_list[0]['title'],prefs['mergeword']) mi = MetaInformation(deftitle,["Temp Author"]) # if all same series, use series for name. But only if all. serieslist = map(lambda x : x['series'], filter(lambda x : x['series'] != None, book_list)) if len(serieslist) == len(book_list): mi.title = serieslist[0] for sr in serieslist: if mi.title != sr: mi.title = deftitle; break # print("======================= mi.title:\n%s\n========================="%mi.title) mi.authors = list() authorslists = map(lambda x : x['authors'], book_list) for l in authorslists: for a in l: if a not in mi.authors: mi.authors.append(a) #mi.authors = [item for sublist in authorslists for item in sublist] # print("======================= mi.authors:\n%s\n========================="%mi.authors) #mi.author_sort = ' & '.join(map(lambda x : x['author_sort'], book_list)) # print("======================= mi.author_sort:\n%s\n========================="%mi.author_sort) # set publisher if all from same publisher. publishers = set(map(lambda x : x['publisher'], book_list)) if len(publishers) == 1: mi.publisher = publishers.pop() # print("======================= mi.publisher:\n%s\n========================="%mi.publisher) tagslists = map(lambda x : x['tags'], book_list) mi.tags = [item for sublist in tagslists for item in sublist] mi.tags.extend(prefs['mergetags'].split(',')) # print("======================= mergetags:\n%s\n========================="%prefs['mergetags']) # print("======================= m.tags:\n%s\n========================="%mi.tags) languageslists = map(lambda x : x['languages'], book_list) mi.languages = [item for sublist in languageslists for item in sublist] mi.series = '' # ======================= make book comments ========================= if len(mi.authors) > 1: booktitle = lambda x : _("%s by %s") % (x['title'],' & '.join(x['authors'])) else: booktitle = lambda x : x['title'] mi.comments = (_("%s containing:")+"\n\n") % prefs['mergeword'] if prefs['includecomments']: def bookcomments(x): if x['comments']: return '<b>%s</b>\n\n%s'%(booktitle(x),x['comments']) else: return '<b>%s</b>\n'%booktitle(x) mi.comments += ('<div class="mergedbook">' + '<hr></div><div class="mergedbook">'.join([ bookcomments(x) for x in book_list]) + '</div>') else: mi.comments += '\n'.join( [ booktitle(x) for x in book_list ] ) # ======================= make book entry ========================= book_id = db.create_book_entry(mi, add_duplicates=True) # set default cover to same as first book coverdata = db.cover(book_list[0]['calibre_id'],index_is_id=True) if coverdata: db.set_cover(book_id, coverdata) # ======================= custom columns =================== print("3:%s"%(time.time()-self.t)) self.t = time.time() # have to get custom from db for each book. idslist = map(lambda x : x['calibre_id'], book_list) custom_columns = self.gui.library_view.model().custom_columns for col, action in prefs['custom_cols'].iteritems(): #print("col: %s action: %s"%(col,action)) if col not in custom_columns: print("%s not an existing column, skipping."%col) continue coldef = custom_columns[col] #print("coldef:%s"%coldef) if action not in permitted_values[coldef['datatype']]: print("%s not a valid column type for %s, skipping."%(col,action)) continue label = coldef['label'] found = False value = None idx = None if action == 'first': idx = 0 if action == 'last': idx = -1 if action in ['first','last']: value = db.get_custom(idslist[idx], label=label, index_is_id=True) if coldef['datatype'] == 'series' and value != None: # get the number-in-series, too. value = "%s [%s]"%(value, db.get_custom_extra(idslist[idx], label=label, index_is_id=True)) found = True if action in ('add','average','averageall'): value = 0.0 count = 0 for bid in idslist: try: value += db.get_custom(bid, label=label, index_is_id=True) found = True # only count ones with values unless averageall count += 1 except: # if not set, it's None and fails. # only count ones with values unless averageall if action == 'averageall': count += 1 if found and action in ('average','averageall'): value = value / count if coldef['datatype'] == 'int': value += 0.5 # so int rounds instead of truncs. if action == 'and': value = True for bid in idslist: try: value = value and db.get_custom(bid, label=label, index_is_id=True) found = True except: # if not set, it's None and fails. pass if action == 'or': value = False for bid in idslist: try: value = value or db.get_custom(bid, label=label, index_is_id=True) found = True except: # if not set, it's None and fails. pass if action == 'newest': value = None for bid in idslist: try: ivalue = db.get_custom(bid, label=label, index_is_id=True) if not value or ivalue > value: value = ivalue found = True except: # if not set, it's None and fails. pass if action == 'oldest': value = None for bid in idslist: try: ivalue = db.get_custom(bid, label=label, index_is_id=True) if not value or ivalue < value: value = ivalue found = True except: # if not set, it's None and fails. pass if action == 'union': if not coldef['is_multiple']: action = 'concat' else: value = set() for bid in idslist: try: value = value.union(db.get_custom(bid, label=label, index_is_id=True)) found = True except: # if not set, it's None and fails. pass if action == 'concat': value = "" for bid in idslist: try: value = value + ' ' + db.get_custom(bid, label=label, index_is_id=True) found = True except: # if not set, it's None and fails. pass value = value.strip() if found and value != None: db.set_custom(book_id,value,label=label,commit=False) db.commit() print("4:%s"%(time.time()-self.t)) self.t = time.time() self.gui.library_view.model().books_added(1) self.gui.library_view.select_rows([book_id]) print("5:%s"%(time.time()-self.t)) self.t = time.time() confirm('\n'+_('''The book for the new Merged EPUB has been created and default metadata filled in. However, the EPUB will *not* be created until after you've reviewed, edited, and closed the metadata dialog that follows.'''), 'epubmerge_created_now_edit_again', self.gui) self.gui.iactions['Edit Metadata'].edit_metadata(False) print("5:%s"%(time.time()-self.t)) self.t = time.time() self.gui.tags_view.recount() totalsize = sum(map(lambda x : x['epub_size'], book_list)) print("merging %s EPUBs totaling %s"%(len(book_list),gethumanreadable(totalsize))) if len(book_list) > 100 or totalsize > 5*1024*1024: confirm('\n'+_('''You're merging %s EPUBs totaling %s. Calibre will be locked until the merge is finished.''')%(len(book_list),gethumanreadable(totalsize)), 'epubmerge_edited_now_merge_again', self.gui) self.gui.status_bar.show_message(_('Merging %s EPUBs...')%len(book_list), 60000) mi = db.get_metadata(book_id,index_is_id=True) mergedepub = PersistentTemporaryFile(suffix='.epub') epubstomerge = map(lambda x : x['epub'] , book_list) coverjpgpath = None if mi.has_cover: # grab the path to the real image. coverjpgpath = os.path.join(db.library_path, db.path(book_id, index_is_id=True), 'cover.jpg') self.do_merge( mergedepub, epubstomerge, authoropts=mi.authors, titleopt=mi.title, descopt=mi.comments, tags=mi.tags, languages=mi.languages, titlenavpoints=prefs['titlenavpoints'], flattentoc=prefs['flattentoc'], printtimes=True, coverjpgpath=coverjpgpath, keepmetadatafiles=prefs['keepmeta'] ) print("6:%s"%(time.time()-self.t)) print(_("Merge finished, output in:\n%s")%mergedepub.name) self.t = time.time() db.add_format_with_hooks(book_id, 'EPUB', mergedepub, index_is_id=True) print("7:%s"%(time.time()-self.t)) self.t = time.time() self.gui.status_bar.show_message(_('Finished merging %s EPUBs.')%len(book_list), 3000) self.gui.library_view.model().refresh_ids([book_id]) self.gui.tags_view.recount() current = self.gui.library_view.currentIndex() self.gui.library_view.model().current_changed(current, self.previous)
def get_metadata(stream, extract_cover=True): whitespace = re.compile(r'\s+') def normalize(s): return whitespace.sub(' ', s).strip() with ZipFile(stream) as zf: meta = zf.read('meta.xml') root = fromstring(meta) def find(field): ns, tag = fields[field] ans = root.xpath('//ns0:{}'.format(tag), namespaces={'ns0': ns}) if ans: return normalize( tostring(ans[0], method='text', encoding='unicode', with_tail=False)).strip() mi = MetaInformation(None, []) title = find('title') if title: mi.title = title creator = find('initial-creator') or find('creator') if creator: mi.authors = string_to_authors(creator) desc = find('description') if desc: mi.comments = desc lang = find('language') if lang and canonicalize_lang(lang): mi.languages = [canonicalize_lang(lang)] kw = find('keyword') or find('keywords') if kw: mi.tags = [x.strip() for x in kw.split(',') if x.strip()] data = {} for tag in root.xpath('//ns0:user-defined', namespaces={'ns0': fields['user-defined'][0]}): name = (tag.get('{%s}name' % METANS) or '').lower() vtype = tag.get('{%s}value-type' % METANS) or 'string' val = tag.text if name and val: if vtype == 'boolean': val = val == 'true' data[name] = val opfmeta = False # we need this later for the cover opfnocover = False if data.get('opf.metadata'): # custom metadata contains OPF information opfmeta = True if data.get('opf.titlesort', ''): mi.title_sort = data['opf.titlesort'] if data.get('opf.authors', ''): mi.authors = string_to_authors(data['opf.authors']) if data.get('opf.authorsort', ''): mi.author_sort = data['opf.authorsort'] if data.get('opf.isbn', ''): isbn = check_isbn(data['opf.isbn']) if isbn is not None: mi.isbn = isbn if data.get('opf.publisher', ''): mi.publisher = data['opf.publisher'] if data.get('opf.pubdate', ''): mi.pubdate = parse_date(data['opf.pubdate'], assume_utc=True) if data.get('opf.identifiers'): try: mi.identifiers = json.loads(data['opf.identifiers']) except Exception: pass if data.get('opf.rating'): try: mi.rating = max(0, min(float(data['opf.rating']), 10)) except Exception: pass if data.get('opf.series', ''): mi.series = data['opf.series'] if data.get('opf.seriesindex', ''): try: mi.series_index = float(data['opf.seriesindex']) except Exception: mi.series_index = 1.0 if data.get('opf.language', ''): cl = canonicalize_lang(data['opf.language']) if cl: mi.languages = [cl] opfnocover = data.get('opf.nocover', False) if not opfnocover: try: read_cover(stream, zf, mi, opfmeta, extract_cover) except Exception: pass # Do not let an error reading the cover prevent reading other data return mi