def process_result(self, group_id, result): if result.err: mi = self.report_metadata_failure(group_id, result.traceback) paths = self.file_groups[group_id] has_cover = False duplicate_info = set() if self.add_formats_to_existing else False else: paths, opf, has_cover, duplicate_info = result.value try: mi = OPF(BytesIO(opf), basedir=self.tdir, populate_spine=False, try_to_guess_cover=False).to_book_metadata() mi.read_metadata_failed = False except Exception: mi = self.report_metadata_failure(group_id, traceback.format_exc()) if mi.is_null('title'): for path in paths: mi.title = os.path.splitext(os.path.basename(path))[0] break if mi.application_id == '__calibre_dummy__': mi.application_id = None if gprefs.get('tag_map_on_add_rules'): from calibre.ebooks.metadata.tag_mapper import map_tags mi.tags = map_tags(mi.tags, gprefs['tag_map_on_add_rules']) if self.author_map_rules: from calibre.ebooks.metadata.author_mapper import map_authors new_authors = map_authors(mi.authors, self.author_map_rules) if new_authors != mi.authors: mi.authors = new_authors if self.db is None: mi.author_sort = authors_to_sort_string(mi.authors) else: mi.author_sort = self.db.author_sort_from_authors(mi.authors) self.pd.msg = mi.title cover_path = os.path.join(self.tdir, '%s.cdata' % group_id) if has_cover else None if self.db is None: if paths: self.items.append((mi, cover_path, paths)) return if self.add_formats_to_existing: identical_book_ids = find_identical_books(mi, self.find_identical_books_data) if identical_book_ids: try: self.merge_books(mi, cover_path, paths, identical_book_ids) except Exception: a = self.report.append a(''), a('-' * 70) a(_('Failed to merge the book: ') + mi.title) [a('\t' + f) for f in paths] a(_('With error:')), a(traceback.format_exc()) else: self.add_book(mi, cover_path, paths) else: if duplicate_info or icu_lower(mi.title or _('Unknown')) in self.added_duplicate_info: self.duplicates.append((mi, cover_path, paths)) else: self.add_book(mi, cover_path, paths)
def process_result(self, group_id, result): if result.err: mi = self.report_metadata_failure(group_id, result.traceback) paths = self.file_groups[group_id] has_cover = False duplicate_info = set() if self.add_formats_to_existing else False else: paths, opf, has_cover, duplicate_info = result.value try: mi = OPF( BytesIO(opf), basedir=self.tdir, populate_spine=False, try_to_guess_cover=False ).to_book_metadata() mi.read_metadata_failed = False except Exception: mi = self.report_metadata_failure(group_id, traceback.format_exc()) if mi.is_null("title"): for path in paths: mi.title = os.path.splitext(os.path.basename(path))[0] break if mi.application_id == "__calibre_dummy__": mi.application_id = None if gprefs.get("tag_map_on_add_rules"): from calibre.ebooks.metadata.tag_mapper import map_tags mi.tags = map_tags(mi.tags, gprefs["tag_map_on_add_rules"]) self.pd.msg = mi.title cover_path = os.path.join(self.tdir, "%s.cdata" % group_id) if has_cover else None if self.db is None: if paths: self.items.append((mi, cover_path, paths)) return if self.add_formats_to_existing: identical_book_ids = find_identical_books(mi, self.find_identical_books_data) if identical_book_ids: try: self.merge_books(mi, cover_path, paths, identical_book_ids) except Exception: a = self.report.append a(""), a("-" * 70) a(_("Failed to merge the book: ") + mi.title) [a("\t" + f) for f in paths] a(_("With error:")), a(traceback.format_exc()) else: self.add_book(mi, cover_path, paths) else: if duplicate_info or icu_lower(mi.title or _("Unknown")) in self.added_duplicate_info: self.duplicates.append((mi, cover_path, paths)) else: self.add_book(mi, cover_path, paths)
def test_find_identical_books(self): # {{{ ' Test find_identical_books ' from calibre.ebooks.metadata.book.base import Metadata from calibre.db.utils import find_identical_books # 'find_identical_books': [(,), (Metadata('unknown'),), (Metadata('xxxx'),)], cache = self.init_cache(self.library_path) data = cache.data_for_find_identical_books() for mi, books in ( (Metadata('title one', ['author one']), {2}), (Metadata(_('Unknown')), {3}), (Metadata('title two', ['author one']), {1}), ): self.assertEqual(books, cache.find_identical_books(mi)) self.assertEqual(books, find_identical_books(mi, data))
def copy_one_book( book_id, src_db, dest_db, duplicate_action='add', automerge_action='overwrite', preserve_date=True, identical_books_data=None, preserve_uuid=False): db = src_db.new_api newdb = dest_db.new_api with db.safe_read_lock, newdb.write_lock: mi = db.get_metadata(book_id, get_cover=True, cover_as_data=True) if not preserve_date: mi.timestamp = now() format_map = {} fmts = list(db.formats(book_id, verify_formats=False)) for fmt in fmts: path = db.format_abspath(book_id, fmt) if path: format_map[fmt.upper()] = path identical_book_list = set() new_authors = {k for k, v in iteritems(newdb.get_item_ids('authors', mi.authors)) if v is None} new_book_id = None return_data = { 'book_id': book_id, 'title': mi.title, 'authors': mi.authors, 'author': mi.format_field('authors')[1], 'action': 'add', 'new_book_id': None } if duplicate_action != 'add': # Scanning for dupes can be slow on a large library so # only do it if the option is set if identical_books_data is None: identical_books_data = identical_books_data = newdb.data_for_find_identical_books() identical_book_list = find_identical_books(mi, identical_books_data) if identical_book_list: # books with same author and nearly same title exist in newdb if duplicate_action == 'add_formats_to_existing': new_book_id = automerge_book(automerge_action, book_id, mi, identical_book_list, newdb, format_map) return_data['action'] = 'automerge' return_data['new_book_id'] = new_book_id postprocess_copy(book_id, new_book_id, new_authors, db, newdb, identical_books_data, duplicate_action) else: return_data['action'] = 'duplicate' return return_data new_book_id = newdb.add_books( [(mi, format_map)], add_duplicates=True, apply_import_tags=tweaks['add_new_book_tags_when_importing_books'], preserve_uuid=preserve_uuid, run_hooks=False)[0][0] postprocess_copy(book_id, new_book_id, new_authors, db, newdb, identical_books_data, duplicate_action) return_data['new_book_id'] = new_book_id return return_data
def test_find_identical_books(self): # {{{ ' Test find_identical_books ' from calibre.ebooks.metadata.book.base import Metadata from calibre.db.utils import find_identical_books # 'find_identical_books': [(,), (Metadata('unknown'),), (Metadata('xxxx'),)], cache = self.init_cache(self.library_path) cache.set_field('languages', {1: ('fra', 'deu')}) data = cache.data_for_find_identical_books() lm = cache.get_metadata(1) lm2 = cache.get_metadata(1) lm2.languages = ['eng'] for mi, books in ( (Metadata('title one', ['author one']), {2}), (Metadata(_('Unknown')), {3}), (Metadata('title two', ['author one']), {1}), (lm, {1}), (lm2, set()), ): self.assertEqual(books, cache.find_identical_books(mi)) self.assertEqual(books, find_identical_books(mi, data))
def test_find_identical_books(self): # {{{ ' Test find_identical_books ' from calibre.ebooks.metadata.book.base import Metadata from calibre.db.utils import find_identical_books # 'find_identical_books': [(,), (Metadata('unknown'),), (Metadata('xxxx'),)], cache = self.init_cache(self.library_path) cache.set_field('languages', {1: ('fra', 'deu')}) data = cache.data_for_find_identical_books() lm = cache.get_metadata(1) lm2 = cache.get_metadata(1) lm2.languages = ['eng'] for mi, books in ( (Metadata('title one', ['author one']), {2}), (Metadata('Unknown', ['Unknown']), {3}), (Metadata('title two', ['author one']), {1}), (lm, {1}), (lm2, set()), ): self.assertEqual(books, cache.find_identical_books(mi)) self.assertEqual(books, find_identical_books(mi, data))
def do_adding(db, request_id, notify_changes, is_remote, mi, format_map, add_duplicates, oautomerge): identical_book_list, added_ids, updated_ids = set(), set(), set() duplicates = [] identical_books_data = None def add_format(book_id, fmt): db.add_format(book_id, fmt, format_map[fmt], replace=True, run_hooks=False) updated_ids.add(book_id) def add_book(): nonlocal added_ids added_ids_, duplicates_ = db.add_books([(mi, format_map)], add_duplicates=True, run_hooks=False) added_ids |= set(added_ids_) duplicates.extend(duplicates_) if oautomerge != 'disabled' or not add_duplicates: identical_books_data = cached_identical_book_data(db, request_id) identical_book_list = find_identical_books(mi, identical_books_data) if oautomerge != 'disabled': if identical_book_list: needs_add = False duplicated_formats = set() for book_id in identical_book_list: book_formats = {q.upper() for q in db.formats(book_id)} input_formats = {q.upper(): q for q in format_map} common_formats = book_formats & set(input_formats) if not common_formats: for x in input_formats: add_format(book_id, input_formats[x]) else: new_formats = set(input_formats) - book_formats if new_formats: for x in new_formats: add_format(book_id, input_formats[x]) if oautomerge == 'overwrite': for x in common_formats: add_format(book_id, input_formats[x]) elif oautomerge == 'ignore': for x in common_formats: duplicated_formats.add(input_formats[x]) elif oautomerge == 'new_record': needs_add = True if needs_add: add_book() if duplicated_formats: duplicates.append( (mi, {x: format_map[x] for x in duplicated_formats})) else: add_book() else: if identical_book_list: duplicates.append((mi, format_map)) else: add_book() if added_ids and identical_books_data is not None: for book_id in added_ids: db.update_data_for_find_identical_books(book_id, identical_books_data) if is_remote: notify_changes(books_added(added_ids)) if updated_ids: notify_changes( formats_added( {book_id: tuple(format_map) for book_id in updated_ids})) db.dump_metadata() return added_ids, updated_ids, duplicates
def do_one(self, num, book_id, newdb): mi = self.db.get_metadata(book_id, index_is_id=True, get_cover=True, cover_as_data=True) if not gprefs['preserve_date_on_ctl']: mi.timestamp = now() self.progress(num, mi.title) fmts = self.db.formats(book_id, index_is_id=True) if not fmts: fmts = [] else: fmts = fmts.split(',') identical_book_list = set() paths = [] for fmt in fmts: p = self.db.format(book_id, fmt, index_is_id=True, as_path=True) if p: paths.append(p) try: if self.check_for_duplicates: # Scanning for dupes can be slow on a large library so # only do it if the option is set identical_book_list = find_identical_books( mi, self.find_identical_books_data) if identical_book_list: # books with same author and nearly same title exist in newdb if prefs['add_formats_to_existing']: self.automerge_book(book_id, mi, identical_book_list, paths, newdb) else: # Report duplicates for later processing self.duplicate_ids[book_id] = (mi.title, mi.authors) return new_authors = { k for k, v in newdb.new_api.get_item_ids( 'authors', mi.authors).iteritems() if v is None } new_book_id = newdb.import_book( mi, paths, notify=False, import_hooks=False, apply_import_tags=tweaks[ 'add_new_book_tags_when_importing_books'], preserve_uuid=self.delete_after) if new_authors: author_id_map = self.db.new_api.get_item_ids( 'authors', new_authors) sort_map, link_map = {}, {} for author, aid in author_id_map.iteritems(): if aid is not None: adata = self.db.new_api.author_data((aid, )).get(aid) if adata is not None: aid = newdb.new_api.get_item_id('authors', author) if aid is not None: asv = adata.get('sort') if asv: sort_map[aid] = asv alv = adata.get('link') if alv: link_map[aid] = alv if sort_map: newdb.new_api.set_sort_for_authors(sort_map, update_books=False) if link_map: newdb.new_api.set_link_for_authors(link_map) co = self.db.conversion_options(book_id, 'PIPE') if co is not None: newdb.set_conversion_options(new_book_id, 'PIPE', co) if self.check_for_duplicates: newdb.new_api.update_data_for_find_identical_books( new_book_id, self.find_identical_books_data) self.processed.add(book_id) finally: for path in paths: try: os.remove(path) except: pass
def do_one(self, num, book_id, newdb): mi = self.db.get_metadata(book_id, index_is_id=True, get_cover=True, cover_as_data=True) if not gprefs['preserve_date_on_ctl']: mi.timestamp = now() self.progress(num, mi.title) fmts = self.db.formats(book_id, index_is_id=True) if not fmts: fmts = [] else: fmts = fmts.split(',') identical_book_list = set() paths = [] for fmt in fmts: p = self.db.format(book_id, fmt, index_is_id=True, as_path=True) if p: paths.append(p) try: if self.check_for_duplicates: # Scanning for dupes can be slow on a large library so # only do it if the option is set identical_book_list = find_identical_books(mi, self.find_identical_books_data) if identical_book_list: # books with same author and nearly same title exist in newdb if prefs['add_formats_to_existing']: self.automerge_book(book_id, mi, identical_book_list, paths, newdb) else: # Report duplicates for later processing self.duplicate_ids[book_id] = (mi.title, mi.authors) return new_authors = {k for k, v in newdb.new_api.get_item_ids('authors', mi.authors).iteritems() if v is None} new_book_id = newdb.import_book(mi, paths, notify=False, import_hooks=False, apply_import_tags=tweaks['add_new_book_tags_when_importing_books'], preserve_uuid=self.delete_after) if new_authors: author_id_map = self.db.new_api.get_item_ids('authors', new_authors) sort_map, link_map = {}, {} for author, aid in author_id_map.iteritems(): if aid is not None: adata = self.db.new_api.author_data((aid,)).get(aid) if adata is not None: aid = newdb.new_api.get_item_id('authors', author) if aid is not None: asv = adata.get('sort') if asv: sort_map[aid] = asv alv = adata.get('link') if alv: link_map[aid] = alv if sort_map: newdb.new_api.set_sort_for_authors(sort_map, update_books=False) if link_map: newdb.new_api.set_link_for_authors(link_map) co = self.db.conversion_options(book_id, 'PIPE') if co is not None: newdb.set_conversion_options(new_book_id, 'PIPE', co) if self.check_for_duplicates: newdb.new_api.update_data_for_find_identical_books(new_book_id, self.find_identical_books_data) self.processed.add(book_id) finally: for path in paths: try: os.remove(path) except: pass