def test_legacy_adding_books(self): # {{{ 'Test various adding books methods' from calibre.ebooks.metadata.book.base import Metadata legacy, old = self.init_legacy(self.cloned_library), self.init_old(self.cloned_library) mi = Metadata('Added Book0', authors=('Added Author',)) with NamedTemporaryFile(suffix='.aff') as f: f.write(b'xxx') f.flush() T = partial(ET, 'add_books', ([f.name], ['AFF'], [mi]), old=old, legacy=legacy) T()(self) book_id = T(kwargs={'return_ids':True})(self)[1][0] self.assertEqual(legacy.new_api.formats(book_id), ('AFF',)) T(kwargs={'add_duplicates':False})(self) mi.title = 'Added Book1' mi.uuid = 'uuu' T = partial(ET, 'import_book', (mi,[f.name]), old=old, legacy=legacy) book_id = T()(self) self.assertNotEqual(legacy.uuid(book_id, index_is_id=True), old.uuid(book_id, index_is_id=True)) book_id = T(kwargs={'preserve_uuid':True})(self) self.assertEqual(legacy.uuid(book_id, index_is_id=True), old.uuid(book_id, index_is_id=True)) self.assertEqual(legacy.new_api.formats(book_id), ('AFF',)) with NamedTemporaryFile(suffix='.opf') as f: f.write(b'zzzz') f.flush() T = partial(ET, 'import_book', (mi,[f.name]), old=old, legacy=legacy) book_id = T()(self) self.assertFalse(legacy.new_api.formats(book_id)) mi.title = 'Added Book2' T = partial(ET, 'create_book_entry', (mi,), old=old, legacy=legacy) T() T({'add_duplicates':False}) T({'force_id':1000})
def test_legacy_adding_books(self): # {{{ 'Test various adding books methods' from calibre.ebooks.metadata.book.base import Metadata legacy, old = self.init_legacy(self.cloned_library), self.init_old(self.cloned_library) mi = Metadata('Added Book0', authors=('Added Author',)) with NamedTemporaryFile(suffix='.aff') as f: f.write(b'xxx') f.flush() T = partial(ET, 'add_books', ([f.name], ['AFF'], [mi]), old=old, legacy=legacy) T()(self) book_id = T(kwargs={'return_ids':True})(self)[1][0] self.assertEqual(legacy.new_api.formats(book_id), ('AFF',)) T(kwargs={'add_duplicates':False})(self) mi.title = 'Added Book1' mi.uuid = 'uuu' T = partial(ET, 'import_book', (mi,[f.name]), old=old, legacy=legacy) book_id = T()(self) self.assertNotEqual(legacy.uuid(book_id, index_is_id=True), old.uuid(book_id, index_is_id=True)) book_id = T(kwargs={'preserve_uuid':True})(self) self.assertEqual(legacy.uuid(book_id, index_is_id=True), old.uuid(book_id, index_is_id=True)) self.assertEqual(legacy.new_api.formats(book_id), ('AFF',)) T = partial(ET, 'add_format', old=old, legacy=legacy) T((0, 'AFF', BytesIO(b'fffff')))(self) T((0, 'AFF', BytesIO(b'fffff')))(self) T((0, 'AFF', BytesIO(b'fffff')), {'replace':True})(self) with NamedTemporaryFile(suffix='.opf') as f: f.write(b'zzzz') f.flush() T = partial(ET, 'import_book', (mi,[f.name]), old=old, legacy=legacy) book_id = T()(self) self.assertFalse(legacy.new_api.formats(book_id)) mi.title = 'Added Book2' T = partial(ET, 'create_book_entry', (mi,), old=old, legacy=legacy) T() T({'add_duplicates':False}) T({'force_id':1000}) with NamedTemporaryFile(suffix='.txt') as f: f.write(b'tttttt') f.seek(0) bid = legacy.add_catalog(f.name, 'My Catalog') self.assertEqual(old.add_catalog(f.name, 'My Catalog'), bid) cache = legacy.new_api self.assertEqual(cache.formats(bid), ('TXT',)) self.assertEqual(cache.field_for('title', bid), 'My Catalog') self.assertEqual(cache.field_for('authors', bid), ('calibre',)) self.assertEqual(cache.field_for('tags', bid), (_('Catalog'),)) self.assertTrue(bid < legacy.add_catalog(f.name, 'Something else')) self.assertEqual(legacy.add_catalog(f.name, 'My Catalog'), bid) self.assertEqual(old.add_catalog(f.name, 'My Catalog'), bid) bid = legacy.add_news(f.name, {'title':'Events', 'add_title_tag':True, 'custom_tags':('one', 'two')}) self.assertEqual(cache.formats(bid), ('TXT',)) self.assertEqual(cache.field_for('authors', bid), ('calibre',)) self.assertEqual(cache.field_for('tags', bid), (_('News'), 'Events', 'one', 'two')) old.close()
def read_metadata(root): ans = Metadata(_('Unknown'), [_('Unknown')]) prefixes, refines = read_prefixes(root), read_refines(root) identifiers = read_identifiers(root, prefixes, refines) ids = {} for key, vals in identifiers.iteritems(): if key == 'calibre': ans.application_id = vals[0] elif key != 'uuid': ids[key] = vals[0] ans.set_identifiers(ids) ans.title = read_title(root, prefixes, refines) or ans.title ans.title_sort = read_title_sort(root, prefixes, refines) or ans.title_sort ans.languages = read_languages(root, prefixes, refines) or ans.languages auts, aus = [], [] for a in read_authors(root, prefixes, refines): auts.append(a.name), aus.append(a.sort) ans.authors = auts or ans.authors ans.author_sort = authors_to_string(aus) or ans.author_sort bkp = read_book_producers(root, prefixes, refines) if bkp: ans.book_producer = bkp[0] pd = read_pubdate(root, prefixes, refines) if not is_date_undefined(pd): ans.pubdate = pd ts = read_timestamp(root, prefixes, refines) if not is_date_undefined(ts): ans.timestamp = ts lm = read_last_modified(root, prefixes, refines) if not is_date_undefined(lm): ans.last_modified = lm return ans
def test(scale=0.25): from PyQt5.Qt import QLabel, QPixmap, QMainWindow, QWidget, QScrollArea, QGridLayout from calibre.gui2 import Application app = Application([]) mi = Metadata('Unknown', ['Kovid Goyal', 'John & Doe', 'Author']) mi.series = 'A series & styles' m = QMainWindow() sa = QScrollArea(m) w = QWidget(m) sa.setWidget(w) l = QGridLayout(w) w.setLayout(l), l.setSpacing(30) scale *= w.devicePixelRatioF() labels = [] for r, color in enumerate(sorted(default_color_themes)): for c, style in enumerate(sorted(all_styles())): mi.series_index = c + 1 mi.title = 'An algorithmic cover [%s]' % color prefs = override_prefs(cprefs, override_color_theme=color, override_style=style) scale_cover(prefs, scale) img = generate_cover(mi, prefs=prefs, as_qimage=True) img.setDevicePixelRatio(w.devicePixelRatioF()) la = QLabel() la.setPixmap(QPixmap.fromImage(img)) l.addWidget(la, r, c) labels.append(la) m.setCentralWidget(sa) w.resize(w.sizeHint()) m.show() app.exec_()
def add_catalog(cache, path, title): from calibre.ebooks.metadata.book.base import Metadata from calibre.ebooks.metadata.meta import get_metadata from calibre.utils.date import utcnow fmt = os.path.splitext(path)[1][1:].lower() with lopen(path, 'rb') as stream, cache.write_lock: matches = cache._search('title:="%s" and tags:="%s"' % (title.replace('"', '\\"'), _('Catalog')), None) db_id = None if matches: db_id = list(matches)[0] try: mi = get_metadata(stream, fmt) mi.authors = ['calibre'] except: mi = Metadata(title, ['calibre']) mi.title, mi.authors = title, ['calibre'] mi.tags = [_('Catalog')] mi.pubdate = mi.timestamp = utcnow() if fmt == 'mobi': mi.cover, mi.cover_data = None, (None, None) if db_id is None: db_id = cache._create_book_entry(mi, apply_import_tags=False) else: cache._set_metadata(db_id, mi) cache._add_format(db_id, fmt, stream) return db_id
def test(scale=0.5): from PyQt5.Qt import QLabel, QApplication, QPixmap, QMainWindow, QWidget, QScrollArea, QGridLayout app = QApplication([]) mi = Metadata('xxx', ['Kovid Goyal', 'John Q. Doe', 'Author']) mi.series = 'A series of styles' m = QMainWindow() sa = QScrollArea(m) w = QWidget(m) sa.setWidget(w) l = QGridLayout(w) w.setLayout(l), l.setSpacing(30) labels = [] for r, color in enumerate(sorted(default_color_themes)): for c, style in enumerate(sorted(all_styles())): mi.series_index = c + 1 mi.title = 'An algorithmic cover [%s]' % color prefs = override_prefs(cprefs, override_color_theme=color, override_style=style) for x in ('cover_width', 'cover_height', 'title_font_size', 'subtitle_font_size', 'footer_font_size'): prefs[x] = int(scale * prefs[x]) img = generate_cover(mi, prefs=prefs, as_qimage=True) la = QLabel() la.setPixmap(QPixmap.fromImage(img)) l.addWidget(la, r, c) labels.append(la) m.setCentralWidget(sa) w.resize(w.sizeHint()) m.show() app.exec_()
def add_catalog(cache, path, title): from calibre.ebooks.metadata.book.base import Metadata from calibre.ebooks.metadata.meta import get_metadata from calibre.utils.date import utcnow fmt = os.path.splitext(path)[1][1:].lower() with lopen(path, 'rb') as stream: with cache.write_lock: matches = cache._search( 'title:="%s" and tags:="%s"' % (title.replace('"', '\\"'), _('Catalog')), None) db_id = None if matches: db_id = list(matches)[0] try: mi = get_metadata(stream, fmt) mi.authors = ['calibre'] except: mi = Metadata(title, ['calibre']) mi.title, mi.authors = title, ['calibre'] mi.tags = [_('Catalog')] mi.pubdate = mi.timestamp = utcnow() if fmt == 'mobi': mi.cover, mi.cover_data = None, (None, None) if db_id is None: db_id = cache._create_book_entry(mi, apply_import_tags=False) else: cache._set_metadata(db_id, mi) cache.add_format( db_id, fmt, stream) # Cant keep write lock since post-import hooks might run return db_id
def add_catalog(cache, path, title, dbapi=None): from calibre.ebooks.metadata.book.base import Metadata from calibre.ebooks.metadata.meta import get_metadata from calibre.utils.date import utcnow fmt = os.path.splitext(path)[1][1:].lower() new_book_added = False with lopen(path, 'rb') as stream: with cache.write_lock: matches = cache._search('title:="%s" and tags:="%s"' % (title.replace('"', '\\"'), _('Catalog')), None) db_id = None if matches: db_id = list(matches)[0] try: mi = get_metadata(stream, fmt) mi.authors = ['calibre'] except: mi = Metadata(title, ['calibre']) mi.title, mi.authors = title, ['calibre'] mi.author_sort = 'calibre' # The MOBI/AZW3 format sets author sort to date mi.tags = [_('Catalog')] mi.pubdate = mi.timestamp = utcnow() if fmt == 'mobi': mi.cover, mi.cover_data = None, (None, None) if db_id is None: db_id = cache._create_book_entry(mi, apply_import_tags=False) new_book_added = True else: cache._set_metadata(db_id, mi) cache.add_format(db_id, fmt, stream, dbapi=dbapi) # Cant keep write lock since post-import hooks might run return db_id, new_book_added
def read_metadata(root, ver=None, return_extra_data=False): ans = Metadata(_('Unknown'), [_('Unknown')]) prefixes, refines = read_prefixes(root), read_refines(root) identifiers = read_identifiers(root, prefixes, refines) ids = {} for key, vals in identifiers.iteritems(): if key == 'calibre': ans.application_id = vals[0] elif key == 'uuid': ans.uuid = vals[0] else: ids[key] = vals[0] ans.set_identifiers(ids) ans.title = read_title(root, prefixes, refines) or ans.title ans.title_sort = read_title_sort(root, prefixes, refines) or ans.title_sort ans.languages = read_languages(root, prefixes, refines) or ans.languages auts, aus = [], [] for a in read_authors(root, prefixes, refines): auts.append(a.name), aus.append(a.sort) ans.authors = auts or ans.authors ans.author_sort = authors_to_string(aus) or ans.author_sort bkp = read_book_producers(root, prefixes, refines) if bkp: if bkp[0]: ans.book_producer = bkp[0] pd = read_pubdate(root, prefixes, refines) if not is_date_undefined(pd): ans.pubdate = pd ts = read_timestamp(root, prefixes, refines) if not is_date_undefined(ts): ans.timestamp = ts lm = read_last_modified(root, prefixes, refines) if not is_date_undefined(lm): ans.last_modified = lm ans.comments = read_comments(root, prefixes, refines) or ans.comments ans.publisher = read_publisher(root, prefixes, refines) or ans.publisher ans.tags = read_tags(root, prefixes, refines) or ans.tags ans.rating = read_rating(root, prefixes, refines) or ans.rating s, si = read_series(root, prefixes, refines) if s: ans.series, ans.series_index = s, si ans.author_link_map = read_author_link_map(root, prefixes, refines) or ans.author_link_map ans.user_categories = read_user_categories(root, prefixes, refines) or ans.user_categories for name, fm in (read_user_metadata(root, prefixes, refines) or {}).iteritems(): ans.set_user_metadata(name, fm) if return_extra_data: ans = ans, ver, read_raster_cover(root, prefixes, refines), first_spine_item(root, prefixes, refines) return ans
def read_metadata(root): ans = Metadata(_('Unknown'), [_('Unknown')]) prefixes, refines = read_prefixes(root), read_refines(root) identifiers = read_identifiers(root, prefixes, refines) ids = {} for key, vals in identifiers.iteritems(): if key == 'calibre': ans.application_id = vals[0] elif key != 'uuid': ids[key] = vals[0] ans.set_identifiers(ids) ans.title = read_title(root, prefixes, refines) or ans.title ans.title_sort = read_title_sort(root, prefixes, refines) or ans.title_sort ans.languages = read_languages(root, prefixes, refines) or ans.languages return ans
def data2mi(self, item): """Converts a single metadata answer in the form of a dict to a MetadataInformation object""" mi = Metadata(_('Unknown')) # Regular metadata mi.title = item.get('title', None) mi.authors = item.get('authors', []) mi.publisher = item.get('publisher', None) if 'id' in item.keys(): mi.set_identifier(self.idkey, item['id']) if 'doi' in item.keys(): mi.set_identifier('doi', item['doi']) if 'isbn' in item.keys(): mi.set_identifier('isbn', item['isbn']) if 'updated' in item.keys(): mi.pubdate = parse_date(item['updated'], assume_utc=True) if 'series' in item.keys(): mi.series = item['series'] mi.series_index = self.format_series_index( item.get('series_index'), None) if 'year' in item.keys(): mi.pubdate = parse_date(item['year'], assume_utc=True) if 'abstract' in item.keys(): mi.comments = self.format_abstract(item['abstract']) if 'language' in item.keys(): mi.language = item['language'] if 'journal' in item.keys(): mi.series = item['journal'] mi.series_index = self.format_series_index(item.get('volume'), item.get('number')) if 'subject' in item.keys(): tags = set([]) for s in item['subject']: tags.update(msc_tags(s)) tags.update(arxiv_tags(s)) mi.tags = list(sorted(tags)) return mi
def data2mi(self, item): """Converts a single metadata answer in the form of a dict to a MetadataInformation object""" mi = Metadata(_('Unknown')) # Regular metadata mi.title = item.get('title', None) mi.authors = item.get('authors', []) mi.publisher = item.get('publisher', None) if 'id' in item.keys(): mi.set_identifier(self.idkey, item['id']) if 'doi' in item.keys(): mi.set_identifier('doi', item['doi']) if 'isbn' in item.keys(): mi.set_identifier('isbn', item['isbn']) if 'updated' in item.keys(): mi.pubdate = parse_date(item['updated'], assume_utc=True) if 'series' in item.keys(): mi.series = item['series'] mi.series_index = self.format_series_index(item.get('series_index'), None) if 'year' in item.keys(): mi.pubdate = parse_date(item['year'], assume_utc=True) if 'abstract' in item.keys(): mi.comments = self.format_abstract(item['abstract']) if 'language' in item.keys(): mi.language = item['language'] if 'journal' in item.keys(): mi.series = item['journal'] mi.series_index = self.format_series_index(item.get('volume'), item.get('number')) if 'subject' in item.keys(): tags = set([]) for s in item['subject']: tags.update(msc_tags(s)) tags.update(arxiv_tags(s)) mi.tags = list(sorted(tags)) return mi
def metadata_from_xmp_packet(raw_bytes): root = parse_xmp_packet(raw_bytes) mi = Metadata(_("Unknown")) title = first_alt("//dc:title", root) if title: mi.title = title authors = multiple_sequences("//dc:creator", root) if authors: mi.authors = authors tags = multiple_sequences("//dc:subject", root) or multiple_sequences("//pdf:Keywords", root) if tags: mi.tags = tags comments = first_alt("//dc:description", root) if comments: mi.comments = comments publishers = multiple_sequences("//dc:publisher", root) if publishers: mi.publisher = publishers[0] try: pubdate = parse_date( first_sequence("//dc:date", root) or first_simple("//xmp:CreateDate", root), assume_utc=False ) except: pass else: mi.pubdate = pubdate bkp = first_simple("//xmp:CreatorTool", root) if bkp: mi.book_producer = bkp md = first_simple("//xmp:MetadataDate", root) if md: try: mi.metadata_date = parse_date(md) except: pass rating = first_simple("//calibre:rating", root) if rating is not None: try: rating = float(rating) if 0 <= rating <= 10: mi.rating = rating except (ValueError, TypeError): pass series, series_index = read_series(root) if series: mi.series, mi.series_index = series, series_index for x in ("title_sort", "author_sort"): for elem in XPath("//calibre:" + x)(root): val = read_simple_property(elem) if val: setattr(mi, x, val) break for x in ("author_link_map", "user_categories"): val = first_simple("//calibre:" + x, root) if val: try: setattr(mi, x, json.loads(val)) except: pass languages = multiple_sequences("//dc:language", root) if languages: languages = filter(None, map(canonicalize_lang, languages)) if languages: mi.languages = languages identifiers = {} for xmpid in XPath("//xmp:Identifier")(root): for scheme, value in read_xmp_identifers(xmpid): if scheme and value: identifiers[scheme.lower()] = value for namespace in ("prism", "pdfx"): for scheme in KNOWN_ID_SCHEMES: if scheme not in identifiers: val = first_simple("//%s:%s" % (namespace, scheme), root) scheme = scheme.lower() if scheme == "isbn": val = check_isbn(val) elif scheme == "doi": val = check_doi(val) if val: identifiers[scheme] = val # Check Dublin Core for recognizable identifier types for scheme, check_func in {"doi": check_doi, "isbn": check_isbn}.iteritems(): if scheme not in identifiers: val = check_func(first_simple("//dc:identifier", root)) if val: identifiers["doi"] = val if identifiers: mi.set_identifiers(identifiers) read_user_metadata(mi, root) return mi
def _get_metadata(self, book_id, get_user_categories=True): # {{{ mi = Metadata(None, template_cache=self.formatter_template_cache) author_ids = self._field_ids_for('authors', book_id) aut_list = [self._author_data(i) for i in author_ids] aum = [] aus = {} aul = {} for rec in aut_list: aut = rec['name'] aum.append(aut) aus[aut] = rec['sort'] aul[aut] = rec['link'] mi.title = self._field_for('title', book_id, default_value=_('Unknown')) mi.authors = aum mi.author_sort = self._field_for('author_sort', book_id, default_value=_('Unknown')) mi.author_sort_map = aus mi.author_link_map = aul mi.comments = self._field_for('comments', book_id) mi.publisher = self._field_for('publisher', book_id) n = nowf() mi.timestamp = self._field_for('timestamp', book_id, default_value=n) mi.pubdate = self._field_for('pubdate', book_id, default_value=n) mi.uuid = self._field_for('uuid', book_id, default_value='dummy') mi.title_sort = self._field_for('sort', book_id, default_value=_('Unknown')) mi.book_size = self._field_for('size', book_id, default_value=0) mi.ondevice_col = self._field_for('ondevice', book_id, default_value='') mi.last_modified = self._field_for('last_modified', book_id, default_value=n) formats = self._field_for('formats', book_id) mi.format_metadata = {} mi.languages = list(self._field_for('languages', book_id)) if not formats: good_formats = None else: mi.format_metadata = FormatMetadata(self, book_id, formats) good_formats = FormatsList(formats, mi.format_metadata) mi.formats = good_formats mi.has_cover = _('Yes') if self._field_for( 'cover', book_id, default_value=False) else '' mi.tags = list(self._field_for('tags', book_id, default_value=())) mi.series = self._field_for('series', book_id) if mi.series: mi.series_index = self._field_for('series_index', book_id, default_value=1.0) mi.rating = self._field_for('rating', book_id) mi.set_identifiers( self._field_for('identifiers', book_id, default_value={})) mi.application_id = book_id mi.id = book_id composites = [] for key, meta in self.field_metadata.custom_iteritems(): mi.set_user_metadata(key, meta) if meta['datatype'] == 'composite': composites.append(key) else: val = self._field_for(key, book_id) if isinstance(val, tuple): val = list(val) extra = self._field_for(key + '_index', book_id) mi.set(key, val=val, extra=extra) for key in composites: mi.set(key, val=self._composite_for(key, book_id, mi)) user_cat_vals = {} if get_user_categories: user_cats = self.backend.prefs['user_categories'] for ucat in user_cats: res = [] for name, cat, ign in user_cats[ucat]: v = mi.get(cat, None) if isinstance(v, list): if name in v: res.append([name, cat]) elif name == v: res.append([name, cat]) user_cat_vals[ucat] = res mi.user_categories = user_cat_vals return mi
def test_legacy_adding_books(self): # {{{ "Test various adding/deleting books methods" from calibre.ebooks.metadata.book.base import Metadata legacy, old = self.init_legacy(self.cloned_library), self.init_old(self.cloned_library) mi = Metadata("Added Book0", authors=("Added Author",)) with NamedTemporaryFile(suffix=".aff") as f: f.write(b"xxx") f.flush() T = partial(ET, "add_books", ([f.name], ["AFF"], [mi]), old=old, legacy=legacy) T()(self) book_id = T(kwargs={"return_ids": True})(self)[1][0] self.assertEqual(legacy.new_api.formats(book_id), ("AFF",)) T(kwargs={"add_duplicates": False})(self) mi.title = "Added Book1" mi.uuid = "uuu" T = partial(ET, "import_book", (mi, [f.name]), old=old, legacy=legacy) book_id = T()(self) self.assertNotEqual(legacy.uuid(book_id, index_is_id=True), old.uuid(book_id, index_is_id=True)) book_id = T(kwargs={"preserve_uuid": True})(self) self.assertEqual(legacy.uuid(book_id, index_is_id=True), old.uuid(book_id, index_is_id=True)) self.assertEqual(legacy.new_api.formats(book_id), ("AFF",)) T = partial(ET, "add_format", old=old, legacy=legacy) T((0, "AFF", BytesIO(b"fffff")))(self) T((0, "AFF", BytesIO(b"fffff")))(self) T((0, "AFF", BytesIO(b"fffff")), {"replace": True})(self) with NamedTemporaryFile(suffix=".opf") as f: f.write(b"zzzz") f.flush() T = partial(ET, "import_book", (mi, [f.name]), old=old, legacy=legacy) book_id = T()(self) self.assertFalse(legacy.new_api.formats(book_id)) mi.title = "Added Book2" T = partial(ET, "create_book_entry", (mi,), old=old, legacy=legacy) T() T({"add_duplicates": False}) T({"force_id": 1000}) with NamedTemporaryFile(suffix=".txt") as f: f.write(b"tttttt") f.seek(0) bid = legacy.add_catalog(f.name, "My Catalog") self.assertEqual(old.add_catalog(f.name, "My Catalog"), bid) cache = legacy.new_api self.assertEqual(cache.formats(bid), ("TXT",)) self.assertEqual(cache.field_for("title", bid), "My Catalog") self.assertEqual(cache.field_for("authors", bid), ("calibre",)) self.assertEqual(cache.field_for("tags", bid), (_("Catalog"),)) self.assertTrue(bid < legacy.add_catalog(f.name, "Something else")) self.assertEqual(legacy.add_catalog(f.name, "My Catalog"), bid) self.assertEqual(old.add_catalog(f.name, "My Catalog"), bid) bid = legacy.add_news(f.name, {"title": "Events", "add_title_tag": True, "custom_tags": ("one", "two")}) self.assertEqual(cache.formats(bid), ("TXT",)) self.assertEqual(cache.field_for("authors", bid), ("calibre",)) self.assertEqual(cache.field_for("tags", bid), (_("News"), "Events", "one", "two")) self.assertTrue(legacy.cover(1, index_is_id=True)) origcov = legacy.cover(1, index_is_id=True) self.assertTrue(legacy.has_cover(1)) legacy.remove_cover(1) self.assertFalse(legacy.has_cover(1)) self.assertFalse(legacy.cover(1, index_is_id=True)) legacy.set_cover(3, origcov) self.assertEqual(legacy.cover(3, index_is_id=True), origcov) self.assertTrue(legacy.has_cover(3)) self.assertTrue(legacy.format(1, "FMT1", index_is_id=True)) legacy.remove_format(1, "FMT1", index_is_id=True) self.assertIsNone(legacy.format(1, "FMT1", index_is_id=True)) legacy.delete_book(1) old.delete_book(1) self.assertNotIn(1, legacy.all_ids()) legacy.dump_metadata((2, 3)) old.close()
def merge(self, results, min_year, do_asr=True): ans = Metadata(_('Unknown')) # We assume the shortest title has the least cruft in it ans.title = self.length_merge('title', results, null_value=ans.title) # No harm in having extra authors, maybe something useful like an # editor or translator ans.authors = self.length_merge('authors', results, null_value=ans.authors, shortest=False) # We assume the shortest publisher has the least cruft in it ans.publisher = self.length_merge('publisher', results, null_value=ans.publisher) # We assume the smallest set of tags has the least cruft in it ans.tags = self.length_merge('tags', results, null_value=ans.tags, shortest=msprefs['fewer_tags']) # We assume the longest series has the most info in it ans.series = self.length_merge('series', results, null_value=ans.series, shortest=False) for r in results: if r.series and r.series == ans.series: ans.series_index = r.series_index break # Average the rating over all sources ratings = [] for r in results: rating = r.rating if rating and rating > 0 and rating <= 5: ratings.append(rating) if ratings: ans.rating = int(round(sum(ratings)/len(ratings))) # Smallest language is likely to be valid ans.language = self.length_merge('language', results, null_value=ans.language) # Choose longest comments ans.comments = self.length_merge('comments', results, null_value=ans.comments, shortest=False) # Published date if min_year: for r in results: year = getattr(r.pubdate, 'year', None) if year == min_year: ans.pubdate = r.pubdate break if getattr(ans.pubdate, 'year', None) == min_year: min_date = datetime(min_year, ans.pubdate.month, ans.pubdate.day, tzinfo=utc_tz) else: min_date = datetime(min_year, 1, 2, tzinfo=utc_tz) ans.pubdate = min_date else: min_date = datetime(3001, 1, 1, tzinfo=utc_tz) for r in results: if r.pubdate is not None: candidate = as_utc(r.pubdate) if candidate < min_date: min_date = candidate if min_date.year < 3000: ans.pubdate = min_date # Identifiers for r in results: ans.identifiers.update(r.identifiers) # Cover URL ans.has_cached_cover_url = bool([r for r in results if getattr(r, 'has_cached_cover_url', False)]) # Merge any other fields with no special handling (random merge) touched_fields = set() for r in results: if hasattr(r, 'identify_plugin'): touched_fields |= r.identify_plugin.touched_fields for f in touched_fields: if f.startswith('identifier:') or not ans.is_null(f): continue setattr(ans, f, self.random_merge(f, results, null_value=getattr(ans, f))) if do_asr: avg = [x.relevance_in_source for x in results] avg = sum(avg)/len(avg) ans.average_source_relevance = avg return ans
def _get_metadata(self, book_id, get_user_categories=True): # {{{ mi = Metadata(None, template_cache=self.formatter_template_cache) author_ids = self._field_ids_for('authors', book_id) aut_list = [self._author_data(i) for i in author_ids] aum = [] aus = {} aul = {} for rec in aut_list: aut = rec['name'] aum.append(aut) aus[aut] = rec['sort'] aul[aut] = rec['link'] mi.title = self._field_for('title', book_id, default_value=_('Unknown')) mi.authors = aum mi.author_sort = self._field_for('author_sort', book_id, default_value=_('Unknown')) mi.author_sort_map = aus mi.author_link_map = aul mi.comments = self._field_for('comments', book_id) mi.publisher = self._field_for('publisher', book_id) n = nowf() mi.timestamp = self._field_for('timestamp', book_id, default_value=n) mi.pubdate = self._field_for('pubdate', book_id, default_value=n) mi.uuid = self._field_for('uuid', book_id, default_value='dummy') mi.title_sort = self._field_for('sort', book_id, default_value=_('Unknown')) mi.book_size = self._field_for('size', book_id, default_value=0) mi.ondevice_col = self._field_for('ondevice', book_id, default_value='') mi.last_modified = self._field_for('last_modified', book_id, default_value=n) formats = self._field_for('formats', book_id) mi.format_metadata = {} mi.languages = list(self._field_for('languages', book_id)) if not formats: good_formats = None else: mi.format_metadata = FormatMetadata(self, book_id, formats) good_formats = FormatsList(formats, mi.format_metadata) mi.formats = good_formats mi.has_cover = _('Yes') if self._field_for('cover', book_id, default_value=False) else '' mi.tags = list(self._field_for('tags', book_id, default_value=())) mi.series = self._field_for('series', book_id) if mi.series: mi.series_index = self._field_for('series_index', book_id, default_value=1.0) mi.rating = self._field_for('rating', book_id) mi.set_identifiers(self._field_for('identifiers', book_id, default_value={})) mi.application_id = book_id mi.id = book_id composites = [] for key, meta in self.field_metadata.custom_iteritems(): mi.set_user_metadata(key, meta) if meta['datatype'] == 'composite': composites.append(key) else: val = self._field_for(key, book_id) if isinstance(val, tuple): val = list(val) extra = self._field_for(key+'_index', book_id) mi.set(key, val=val, extra=extra) for key in composites: mi.set(key, val=self._composite_for(key, book_id, mi)) user_cat_vals = {} if get_user_categories: user_cats = self.backend.prefs['user_categories'] for ucat in user_cats: res = [] for name,cat,ign in user_cats[ucat]: v = mi.get(cat, None) if isinstance(v, list): if name in v: res.append([name,cat]) elif name == v: res.append([name,cat]) user_cat_vals[ucat] = res mi.user_categories = user_cat_vals return mi
def metadata_from_xmp_packet(raw_bytes): root = parse_xmp_packet(raw_bytes) mi = Metadata(_('Unknown')) title = first_alt('//dc:title', root) if title.startswith(r'\376\377'): # corrupted XMP packet generated by Nitro PDF. See # https://bugs.launchpad.net/calibre/+bug/1541981 raise ValueError('Corrupted XMP metadata packet detected, probably generated by Nitro PDF') if title: mi.title = title authors = multiple_sequences('//dc:creator', root) if authors: mi.authors = authors tags = multiple_sequences('//dc:subject', root) or multiple_sequences('//pdf:Keywords', root) if tags: mi.tags = tags comments = first_alt('//dc:description', root) if comments: mi.comments = comments publishers = multiple_sequences('//dc:publisher', root) if publishers: mi.publisher = publishers[0] try: pubdate = parse_date(first_sequence('//dc:date', root) or first_simple('//xmp:CreateDate', root), assume_utc=False) except: pass else: mi.pubdate = pubdate bkp = first_simple('//xmp:CreatorTool', root) if bkp: mi.book_producer = bkp md = safe_parse_date(first_simple('//xmp:MetadataDate', root)) mod = safe_parse_date(first_simple('//xmp:ModifyDate', root)) fd = more_recent(md, mod) if fd is not None: mi.metadata_date = fd rating = first_simple('//calibre:rating', root) if rating is not None: try: rating = float(rating) if 0 <= rating <= 10: mi.rating = rating except (ValueError, TypeError): pass series, series_index = read_series(root) if series: mi.series, mi.series_index = series, series_index for x in ('title_sort', 'author_sort'): for elem in XPath('//calibre:' + x)(root): val = read_simple_property(elem) if val: setattr(mi, x, val) break for x in ('author_link_map', 'user_categories'): val = first_simple('//calibre:'+x, root) if val: try: setattr(mi, x, json.loads(val)) except: pass languages = multiple_sequences('//dc:language', root) if languages: languages = filter(None, map(canonicalize_lang, languages)) if languages: mi.languages = languages identifiers = {} for xmpid in XPath('//xmp:Identifier')(root): for scheme, value in read_xmp_identifers(xmpid): if scheme and value: identifiers[scheme.lower()] = value for namespace in ('prism', 'pdfx'): for scheme in KNOWN_ID_SCHEMES: if scheme not in identifiers: val = first_simple('//%s:%s' % (namespace, scheme), root) scheme = scheme.lower() if scheme == 'isbn': val = check_isbn(val) elif scheme == 'doi': val = check_doi(val) if val: identifiers[scheme] = val # Check Dublin Core for recognizable identifier types for scheme, check_func in {'doi':check_doi, 'isbn':check_isbn}.iteritems(): if scheme not in identifiers: val = check_func(first_simple('//dc:identifier', root)) if val: identifiers['doi'] = val if identifiers: mi.set_identifiers(identifiers) read_user_metadata(mi, root) return mi
def metadata_from_xmp_packet(raw_bytes): root = parse_xmp_packet(raw_bytes) mi = Metadata(_('Unknown')) title = first_alt('//dc:title', root) if title: mi.title = title authors = multiple_sequences('//dc:creator', root) if authors: mi.authors = authors tags = multiple_sequences('//dc:subject', root) or multiple_sequences('//pdf:Keywords', root) if tags: mi.tags = tags comments = first_alt('//dc:description', root) if comments: mi.comments = comments publishers = multiple_sequences('//dc:publisher', root) if publishers: mi.publisher = publishers[0] try: pubdate = parse_date(first_sequence('//dc:date', root) or first_simple('//xmp:CreateDate', root), assume_utc=False) except: pass else: mi.pubdate = pubdate bkp = first_simple('//xmp:CreatorTool', root) if bkp: mi.book_producer = bkp rating = first_simple('//calibre:rating', root) if rating is not None: try: rating = float(rating) if 0 <= rating <= 10: mi.rating = rating except (ValueError, TypeError): pass series, series_index = read_series(root) if series: mi.series, mi.series_index = series, series_index for x in ('title_sort', 'author_sort'): for elem in XPath('//calibre:' + x)(root): val = read_simple_property(elem) if val: setattr(mi, x, val) break languages = multiple_sequences('//dc:language', root) if languages: languages = filter(None, map(canonicalize_lang, languages)) if languages: mi.languages = languages identifiers = {} for xmpid in XPath('//xmp:Identifier')(root): for scheme, value in read_xmp_identifers(xmpid): if scheme and value: identifiers[scheme.lower()] = value for namespace in ('prism', 'pdfx'): for scheme in KNOWN_ID_SCHEMES: if scheme not in identifiers: val = first_simple('//%s:%s' % (namespace, scheme), root) scheme = scheme.lower() if scheme == 'isbn': val = check_isbn(val) elif scheme == 'doi': val = check_doi(val) if val: identifiers[scheme] = val # Check Dublin Core for recognizable identifier types for scheme, check_func in {'doi':check_doi, 'isbn':check_isbn}.iteritems(): if scheme not in identifiers: val = check_func(first_simple('//dc:identifier', root)) if val: identifiers['doi'] = val if identifiers: mi.set_identifiers(identifiers) return mi
def metadata_from_xmp_packet(raw_bytes): root = parse_xmp_packet(raw_bytes) mi = Metadata(_('Unknown')) title = first_alt('//dc:title', root) if title: mi.title = title authors = multiple_sequences('//dc:creator', root) if authors: mi.authors = authors tags = multiple_sequences('//dc:subject', root) or multiple_sequences( '//pdf:Keywords', root) if tags: mi.tags = tags comments = first_alt('//dc:description', root) if comments: mi.comments = comments publishers = multiple_sequences('//dc:publisher', root) if publishers: mi.publisher = publishers[0] try: pubdate = parse_date(first_sequence('//dc:date', root) or first_simple('//xmp:CreateDate', root), assume_utc=False) except: pass else: mi.pubdate = pubdate bkp = first_simple('//xmp:CreatorTool', root) if bkp: mi.book_producer = bkp md = first_simple('//xmp:MetadataDate', root) if md: try: mi.metadata_date = parse_date(md) except: pass rating = first_simple('//calibre:rating', root) if rating is not None: try: rating = float(rating) if 0 <= rating <= 10: mi.rating = rating except (ValueError, TypeError): pass series, series_index = read_series(root) if series: mi.series, mi.series_index = series, series_index for x in ('title_sort', 'author_sort'): for elem in XPath('//calibre:' + x)(root): val = read_simple_property(elem) if val: setattr(mi, x, val) break for x in ('author_link_map', 'user_categories'): val = first_simple('//calibre:' + x, root) if val: try: setattr(mi, x, json.loads(val)) except: pass languages = multiple_sequences('//dc:language', root) if languages: languages = filter(None, map(canonicalize_lang, languages)) if languages: mi.languages = languages identifiers = {} for xmpid in XPath('//xmp:Identifier')(root): for scheme, value in read_xmp_identifers(xmpid): if scheme and value: identifiers[scheme.lower()] = value for namespace in ('prism', 'pdfx'): for scheme in KNOWN_ID_SCHEMES: if scheme not in identifiers: val = first_simple('//%s:%s' % (namespace, scheme), root) scheme = scheme.lower() if scheme == 'isbn': val = check_isbn(val) elif scheme == 'doi': val = check_doi(val) if val: identifiers[scheme] = val # Check Dublin Core for recognizable identifier types for scheme, check_func in { 'doi': check_doi, 'isbn': check_isbn }.iteritems(): if scheme not in identifiers: val = check_func(first_simple('//dc:identifier', root)) if val: identifiers['doi'] = val if identifiers: mi.set_identifiers(identifiers) read_user_metadata(mi, root) return mi