Beispiel #1
0
    def test_legacy_adding_books(self):  # {{{
        'Test various adding books methods'
        from calibre.ebooks.metadata.book.base import Metadata
        legacy, old = self.init_legacy(self.cloned_library), self.init_old(self.cloned_library)
        mi = Metadata('Added Book0', authors=('Added Author',))
        with NamedTemporaryFile(suffix='.aff') as f:
            f.write(b'xxx')
            f.flush()
            T = partial(ET, 'add_books', ([f.name], ['AFF'], [mi]), old=old, legacy=legacy)
            T()(self)
            book_id = T(kwargs={'return_ids':True})(self)[1][0]
            self.assertEqual(legacy.new_api.formats(book_id), ('AFF',))
            T(kwargs={'add_duplicates':False})(self)
            mi.title = 'Added Book1'
            mi.uuid = 'uuu'
            T = partial(ET, 'import_book', (mi,[f.name]), old=old, legacy=legacy)
            book_id = T()(self)
            self.assertNotEqual(legacy.uuid(book_id, index_is_id=True), old.uuid(book_id, index_is_id=True))
            book_id = T(kwargs={'preserve_uuid':True})(self)
            self.assertEqual(legacy.uuid(book_id, index_is_id=True), old.uuid(book_id, index_is_id=True))
            self.assertEqual(legacy.new_api.formats(book_id), ('AFF',))
        with NamedTemporaryFile(suffix='.opf') as f:
            f.write(b'zzzz')
            f.flush()
            T = partial(ET, 'import_book', (mi,[f.name]), old=old, legacy=legacy)
            book_id = T()(self)
            self.assertFalse(legacy.new_api.formats(book_id))

        mi.title = 'Added Book2'
        T = partial(ET, 'create_book_entry', (mi,), old=old, legacy=legacy)
        T()
        T({'add_duplicates':False})
        T({'force_id':1000})
Beispiel #2
0
    def test_legacy_adding_books(self):  # {{{
        'Test various adding books methods'
        from calibre.ebooks.metadata.book.base import Metadata
        legacy, old = self.init_legacy(self.cloned_library), self.init_old(self.cloned_library)
        mi = Metadata('Added Book0', authors=('Added Author',))
        with NamedTemporaryFile(suffix='.aff') as f:
            f.write(b'xxx')
            f.flush()
            T = partial(ET, 'add_books', ([f.name], ['AFF'], [mi]), old=old, legacy=legacy)
            T()(self)
            book_id = T(kwargs={'return_ids':True})(self)[1][0]
            self.assertEqual(legacy.new_api.formats(book_id), ('AFF',))
            T(kwargs={'add_duplicates':False})(self)
            mi.title = 'Added Book1'
            mi.uuid = 'uuu'
            T = partial(ET, 'import_book', (mi,[f.name]), old=old, legacy=legacy)
            book_id = T()(self)
            self.assertNotEqual(legacy.uuid(book_id, index_is_id=True), old.uuid(book_id, index_is_id=True))
            book_id = T(kwargs={'preserve_uuid':True})(self)
            self.assertEqual(legacy.uuid(book_id, index_is_id=True), old.uuid(book_id, index_is_id=True))
            self.assertEqual(legacy.new_api.formats(book_id), ('AFF',))

            T = partial(ET, 'add_format', old=old, legacy=legacy)
            T((0, 'AFF', BytesIO(b'fffff')))(self)
            T((0, 'AFF', BytesIO(b'fffff')))(self)
            T((0, 'AFF', BytesIO(b'fffff')), {'replace':True})(self)
        with NamedTemporaryFile(suffix='.opf') as f:
            f.write(b'zzzz')
            f.flush()
            T = partial(ET, 'import_book', (mi,[f.name]), old=old, legacy=legacy)
            book_id = T()(self)
            self.assertFalse(legacy.new_api.formats(book_id))

        mi.title = 'Added Book2'
        T = partial(ET, 'create_book_entry', (mi,), old=old, legacy=legacy)
        T()
        T({'add_duplicates':False})
        T({'force_id':1000})

        with NamedTemporaryFile(suffix='.txt') as f:
            f.write(b'tttttt')
            f.seek(0)
            bid = legacy.add_catalog(f.name, 'My Catalog')
            self.assertEqual(old.add_catalog(f.name, 'My Catalog'), bid)
            cache = legacy.new_api
            self.assertEqual(cache.formats(bid), ('TXT',))
            self.assertEqual(cache.field_for('title', bid), 'My Catalog')
            self.assertEqual(cache.field_for('authors', bid), ('calibre',))
            self.assertEqual(cache.field_for('tags', bid), (_('Catalog'),))
            self.assertTrue(bid < legacy.add_catalog(f.name, 'Something else'))
            self.assertEqual(legacy.add_catalog(f.name, 'My Catalog'), bid)
            self.assertEqual(old.add_catalog(f.name, 'My Catalog'), bid)

            bid = legacy.add_news(f.name, {'title':'Events', 'add_title_tag':True, 'custom_tags':('one', 'two')})
            self.assertEqual(cache.formats(bid), ('TXT',))
            self.assertEqual(cache.field_for('authors', bid), ('calibre',))
            self.assertEqual(cache.field_for('tags', bid), (_('News'), 'Events', 'one', 'two'))

        old.close()
Beispiel #3
0
def read_metadata(root):
    ans = Metadata(_('Unknown'), [_('Unknown')])
    prefixes, refines = read_prefixes(root), read_refines(root)
    identifiers = read_identifiers(root, prefixes, refines)
    ids = {}
    for key, vals in identifiers.iteritems():
        if key == 'calibre':
            ans.application_id = vals[0]
        elif key != 'uuid':
            ids[key] = vals[0]
    ans.set_identifiers(ids)
    ans.title = read_title(root, prefixes, refines) or ans.title
    ans.title_sort = read_title_sort(root, prefixes, refines) or ans.title_sort
    ans.languages = read_languages(root, prefixes, refines) or ans.languages
    auts, aus = [], []
    for a in read_authors(root, prefixes, refines):
        auts.append(a.name), aus.append(a.sort)
    ans.authors = auts or ans.authors
    ans.author_sort = authors_to_string(aus) or ans.author_sort
    bkp = read_book_producers(root, prefixes, refines)
    if bkp:
        ans.book_producer = bkp[0]
    pd = read_pubdate(root, prefixes, refines)
    if not is_date_undefined(pd):
        ans.pubdate = pd
    ts = read_timestamp(root, prefixes, refines)
    if not is_date_undefined(ts):
        ans.timestamp = ts
    lm = read_last_modified(root, prefixes, refines)
    if not is_date_undefined(lm):
        ans.last_modified = lm
    return ans
Beispiel #4
0
def test(scale=0.25):
    from PyQt5.Qt import QLabel, QPixmap, QMainWindow, QWidget, QScrollArea, QGridLayout
    from calibre.gui2 import Application
    app = Application([])
    mi = Metadata('Unknown', ['Kovid Goyal', 'John & Doe', 'Author'])
    mi.series = 'A series & styles'
    m = QMainWindow()
    sa = QScrollArea(m)
    w = QWidget(m)
    sa.setWidget(w)
    l = QGridLayout(w)
    w.setLayout(l), l.setSpacing(30)
    scale *= w.devicePixelRatioF()
    labels = []
    for r, color in enumerate(sorted(default_color_themes)):
        for c, style in enumerate(sorted(all_styles())):
            mi.series_index = c + 1
            mi.title = 'An algorithmic cover [%s]' % color
            prefs = override_prefs(cprefs, override_color_theme=color, override_style=style)
            scale_cover(prefs, scale)
            img = generate_cover(mi, prefs=prefs, as_qimage=True)
            img.setDevicePixelRatio(w.devicePixelRatioF())
            la = QLabel()
            la.setPixmap(QPixmap.fromImage(img))
            l.addWidget(la, r, c)
            labels.append(la)
    m.setCentralWidget(sa)
    w.resize(w.sizeHint())
    m.show()
    app.exec_()
Beispiel #5
0
def add_catalog(cache, path, title):
    from calibre.ebooks.metadata.book.base import Metadata
    from calibre.ebooks.metadata.meta import get_metadata
    from calibre.utils.date import utcnow

    fmt = os.path.splitext(path)[1][1:].lower()
    with lopen(path, 'rb') as stream, cache.write_lock:
        matches = cache._search('title:="%s" and tags:="%s"' % (title.replace('"', '\\"'), _('Catalog')), None)
        db_id = None
        if matches:
            db_id = list(matches)[0]
        try:
            mi = get_metadata(stream, fmt)
            mi.authors = ['calibre']
        except:
            mi = Metadata(title, ['calibre'])
        mi.title, mi.authors = title, ['calibre']
        mi.tags = [_('Catalog')]
        mi.pubdate = mi.timestamp = utcnow()
        if fmt == 'mobi':
            mi.cover, mi.cover_data = None, (None, None)
        if db_id is None:
            db_id = cache._create_book_entry(mi, apply_import_tags=False)
        else:
            cache._set_metadata(db_id, mi)
        cache._add_format(db_id, fmt, stream)

    return db_id
Beispiel #6
0
def test(scale=0.5):
    from PyQt5.Qt import QLabel, QApplication, QPixmap, QMainWindow, QWidget, QScrollArea, QGridLayout
    app = QApplication([])
    mi = Metadata('xxx', ['Kovid Goyal', 'John Q. Doe', 'Author'])
    mi.series = 'A series of styles'
    m = QMainWindow()
    sa = QScrollArea(m)
    w = QWidget(m)
    sa.setWidget(w)
    l = QGridLayout(w)
    w.setLayout(l), l.setSpacing(30)
    labels = []
    for r, color in enumerate(sorted(default_color_themes)):
        for c, style in enumerate(sorted(all_styles())):
            mi.series_index = c + 1
            mi.title = 'An algorithmic cover [%s]' % color
            prefs = override_prefs(cprefs, override_color_theme=color, override_style=style)
            for x in ('cover_width', 'cover_height', 'title_font_size', 'subtitle_font_size', 'footer_font_size'):
                prefs[x] = int(scale * prefs[x])
            img = generate_cover(mi, prefs=prefs, as_qimage=True)
            la = QLabel()
            la.setPixmap(QPixmap.fromImage(img))
            l.addWidget(la, r, c)
            labels.append(la)
    m.setCentralWidget(sa)
    w.resize(w.sizeHint())
    m.show()
    app.exec_()
Beispiel #7
0
def add_catalog(cache, path, title):
    from calibre.ebooks.metadata.book.base import Metadata
    from calibre.ebooks.metadata.meta import get_metadata
    from calibre.utils.date import utcnow

    fmt = os.path.splitext(path)[1][1:].lower()
    with lopen(path, 'rb') as stream:
        with cache.write_lock:
            matches = cache._search(
                'title:="%s" and tags:="%s"' %
                (title.replace('"', '\\"'), _('Catalog')), None)
            db_id = None
            if matches:
                db_id = list(matches)[0]
            try:
                mi = get_metadata(stream, fmt)
                mi.authors = ['calibre']
            except:
                mi = Metadata(title, ['calibre'])
            mi.title, mi.authors = title, ['calibre']
            mi.tags = [_('Catalog')]
            mi.pubdate = mi.timestamp = utcnow()
            if fmt == 'mobi':
                mi.cover, mi.cover_data = None, (None, None)
            if db_id is None:
                db_id = cache._create_book_entry(mi, apply_import_tags=False)
            else:
                cache._set_metadata(db_id, mi)
        cache.add_format(
            db_id, fmt,
            stream)  # Cant keep write lock since post-import hooks might run

    return db_id
Beispiel #8
0
def add_catalog(cache, path, title, dbapi=None):
    from calibre.ebooks.metadata.book.base import Metadata
    from calibre.ebooks.metadata.meta import get_metadata
    from calibre.utils.date import utcnow

    fmt = os.path.splitext(path)[1][1:].lower()
    new_book_added = False
    with lopen(path, 'rb') as stream:
        with cache.write_lock:
            matches = cache._search('title:="%s" and tags:="%s"' % (title.replace('"', '\\"'), _('Catalog')), None)
            db_id = None
            if matches:
                db_id = list(matches)[0]
            try:
                mi = get_metadata(stream, fmt)
                mi.authors = ['calibre']
            except:
                mi = Metadata(title, ['calibre'])
            mi.title, mi.authors = title, ['calibre']
            mi.author_sort = 'calibre'  # The MOBI/AZW3 format sets author sort to date
            mi.tags = [_('Catalog')]
            mi.pubdate = mi.timestamp = utcnow()
            if fmt == 'mobi':
                mi.cover, mi.cover_data = None, (None, None)
            if db_id is None:
                db_id = cache._create_book_entry(mi, apply_import_tags=False)
                new_book_added = True
            else:
                cache._set_metadata(db_id, mi)
        cache.add_format(db_id, fmt, stream, dbapi=dbapi)  # Cant keep write lock since post-import hooks might run

    return db_id, new_book_added
Beispiel #9
0
def read_metadata(root, ver=None, return_extra_data=False):
    ans = Metadata(_('Unknown'), [_('Unknown')])
    prefixes, refines = read_prefixes(root), read_refines(root)
    identifiers = read_identifiers(root, prefixes, refines)
    ids = {}
    for key, vals in identifiers.iteritems():
        if key == 'calibre':
            ans.application_id = vals[0]
        elif key == 'uuid':
            ans.uuid = vals[0]
        else:
            ids[key] = vals[0]
    ans.set_identifiers(ids)
    ans.title = read_title(root, prefixes, refines) or ans.title
    ans.title_sort = read_title_sort(root, prefixes, refines) or ans.title_sort
    ans.languages = read_languages(root, prefixes, refines) or ans.languages
    auts, aus = [], []
    for a in read_authors(root, prefixes, refines):
        auts.append(a.name), aus.append(a.sort)
    ans.authors = auts or ans.authors
    ans.author_sort = authors_to_string(aus) or ans.author_sort
    bkp = read_book_producers(root, prefixes, refines)
    if bkp:
        if bkp[0]:
            ans.book_producer = bkp[0]
    pd = read_pubdate(root, prefixes, refines)
    if not is_date_undefined(pd):
        ans.pubdate = pd
    ts = read_timestamp(root, prefixes, refines)
    if not is_date_undefined(ts):
        ans.timestamp = ts
    lm = read_last_modified(root, prefixes, refines)
    if not is_date_undefined(lm):
        ans.last_modified = lm
    ans.comments = read_comments(root, prefixes, refines) or ans.comments
    ans.publisher = read_publisher(root, prefixes, refines) or ans.publisher
    ans.tags = read_tags(root, prefixes, refines) or ans.tags
    ans.rating = read_rating(root, prefixes, refines) or ans.rating
    s, si = read_series(root, prefixes, refines)
    if s:
        ans.series, ans.series_index = s, si
    ans.author_link_map = read_author_link_map(root, prefixes, refines) or ans.author_link_map
    ans.user_categories = read_user_categories(root, prefixes, refines) or ans.user_categories
    for name, fm in (read_user_metadata(root, prefixes, refines) or {}).iteritems():
        ans.set_user_metadata(name, fm)
    if return_extra_data:
        ans = ans, ver, read_raster_cover(root, prefixes, refines), first_spine_item(root, prefixes, refines)
    return ans
Beispiel #10
0
def read_metadata(root):
    ans = Metadata(_('Unknown'), [_('Unknown')])
    prefixes, refines = read_prefixes(root), read_refines(root)
    identifiers = read_identifiers(root, prefixes, refines)
    ids = {}
    for key, vals in identifiers.iteritems():
        if key == 'calibre':
            ans.application_id = vals[0]
        elif key != 'uuid':
            ids[key] = vals[0]
    ans.set_identifiers(ids)
    ans.title = read_title(root, prefixes, refines) or ans.title
    ans.title_sort = read_title_sort(root, prefixes, refines) or ans.title_sort
    ans.languages = read_languages(root, prefixes, refines) or ans.languages

    return ans
Beispiel #11
0
    def data2mi(self, item):
        """Converts a single metadata answer in the form of a dict to a MetadataInformation object"""

        mi = Metadata(_('Unknown'))

        # Regular metadata
        mi.title = item.get('title', None)
        mi.authors = item.get('authors', [])
        mi.publisher = item.get('publisher', None)

        if 'id' in item.keys(): mi.set_identifier(self.idkey, item['id'])
        if 'doi' in item.keys(): mi.set_identifier('doi', item['doi'])
        if 'isbn' in item.keys(): mi.set_identifier('isbn', item['isbn'])

        if 'updated' in item.keys():
            mi.pubdate = parse_date(item['updated'], assume_utc=True)

        if 'series' in item.keys():
            mi.series = item['series']
            mi.series_index = self.format_series_index(
                item.get('series_index'), None)

        if 'year' in item.keys():
            mi.pubdate = parse_date(item['year'], assume_utc=True)

        if 'abstract' in item.keys():
            mi.comments = self.format_abstract(item['abstract'])

        if 'language' in item.keys(): mi.language = item['language']

        if 'journal' in item.keys():
            mi.series = item['journal']
            mi.series_index = self.format_series_index(item.get('volume'),
                                                       item.get('number'))

        if 'subject' in item.keys():
            tags = set([])
            for s in item['subject']:
                tags.update(msc_tags(s))
                tags.update(arxiv_tags(s))

            mi.tags = list(sorted(tags))

        return mi
Beispiel #12
0
    def data2mi(self, item):
        """Converts a single metadata answer in the form of a dict to a MetadataInformation object"""

        mi = Metadata(_('Unknown'))

        # Regular metadata
        mi.title = item.get('title', None)
        mi.authors = item.get('authors', [])
        mi.publisher = item.get('publisher', None)

        if 'id' in item.keys(): mi.set_identifier(self.idkey, item['id'])
        if 'doi' in item.keys(): mi.set_identifier('doi', item['doi'])
        if 'isbn' in item.keys(): mi.set_identifier('isbn', item['isbn'])

        if 'updated' in item.keys(): mi.pubdate = parse_date(item['updated'], assume_utc=True)

        if 'series' in item.keys():
            mi.series = item['series']
            mi.series_index = self.format_series_index(item.get('series_index'), None)

        if 'year' in item.keys(): mi.pubdate = parse_date(item['year'], assume_utc=True)

        if 'abstract' in item.keys(): mi.comments = self.format_abstract(item['abstract'])

        if 'language' in item.keys(): mi.language = item['language']

        if 'journal' in item.keys():
            mi.series = item['journal']
            mi.series_index = self.format_series_index(item.get('volume'), item.get('number'))

        if 'subject' in item.keys():
            tags = set([])
            for s in item['subject']:
                tags.update(msc_tags(s))
                tags.update(arxiv_tags(s))

            mi.tags = list(sorted(tags))

        return mi
Beispiel #13
0
def metadata_from_xmp_packet(raw_bytes):
    root = parse_xmp_packet(raw_bytes)
    mi = Metadata(_("Unknown"))
    title = first_alt("//dc:title", root)
    if title:
        mi.title = title
    authors = multiple_sequences("//dc:creator", root)
    if authors:
        mi.authors = authors
    tags = multiple_sequences("//dc:subject", root) or multiple_sequences("//pdf:Keywords", root)
    if tags:
        mi.tags = tags
    comments = first_alt("//dc:description", root)
    if comments:
        mi.comments = comments
    publishers = multiple_sequences("//dc:publisher", root)
    if publishers:
        mi.publisher = publishers[0]
    try:
        pubdate = parse_date(
            first_sequence("//dc:date", root) or first_simple("//xmp:CreateDate", root), assume_utc=False
        )
    except:
        pass
    else:
        mi.pubdate = pubdate
    bkp = first_simple("//xmp:CreatorTool", root)
    if bkp:
        mi.book_producer = bkp
    md = first_simple("//xmp:MetadataDate", root)
    if md:
        try:
            mi.metadata_date = parse_date(md)
        except:
            pass
    rating = first_simple("//calibre:rating", root)
    if rating is not None:
        try:
            rating = float(rating)
            if 0 <= rating <= 10:
                mi.rating = rating
        except (ValueError, TypeError):
            pass
    series, series_index = read_series(root)
    if series:
        mi.series, mi.series_index = series, series_index
    for x in ("title_sort", "author_sort"):
        for elem in XPath("//calibre:" + x)(root):
            val = read_simple_property(elem)
            if val:
                setattr(mi, x, val)
                break
    for x in ("author_link_map", "user_categories"):
        val = first_simple("//calibre:" + x, root)
        if val:
            try:
                setattr(mi, x, json.loads(val))
            except:
                pass

    languages = multiple_sequences("//dc:language", root)
    if languages:
        languages = filter(None, map(canonicalize_lang, languages))
        if languages:
            mi.languages = languages

    identifiers = {}
    for xmpid in XPath("//xmp:Identifier")(root):
        for scheme, value in read_xmp_identifers(xmpid):
            if scheme and value:
                identifiers[scheme.lower()] = value

    for namespace in ("prism", "pdfx"):
        for scheme in KNOWN_ID_SCHEMES:
            if scheme not in identifiers:
                val = first_simple("//%s:%s" % (namespace, scheme), root)
                scheme = scheme.lower()
                if scheme == "isbn":
                    val = check_isbn(val)
                elif scheme == "doi":
                    val = check_doi(val)
                if val:
                    identifiers[scheme] = val

    # Check Dublin Core for recognizable identifier types
    for scheme, check_func in {"doi": check_doi, "isbn": check_isbn}.iteritems():
        if scheme not in identifiers:
            val = check_func(first_simple("//dc:identifier", root))
            if val:
                identifiers["doi"] = val

    if identifiers:
        mi.set_identifiers(identifiers)

    read_user_metadata(mi, root)

    return mi
Beispiel #14
0
    def _get_metadata(self, book_id, get_user_categories=True):  # {{{
        mi = Metadata(None, template_cache=self.formatter_template_cache)
        author_ids = self._field_ids_for('authors', book_id)
        aut_list = [self._author_data(i) for i in author_ids]
        aum = []
        aus = {}
        aul = {}
        for rec in aut_list:
            aut = rec['name']
            aum.append(aut)
            aus[aut] = rec['sort']
            aul[aut] = rec['link']
        mi.title = self._field_for('title',
                                   book_id,
                                   default_value=_('Unknown'))
        mi.authors = aum
        mi.author_sort = self._field_for('author_sort',
                                         book_id,
                                         default_value=_('Unknown'))
        mi.author_sort_map = aus
        mi.author_link_map = aul
        mi.comments = self._field_for('comments', book_id)
        mi.publisher = self._field_for('publisher', book_id)
        n = nowf()
        mi.timestamp = self._field_for('timestamp', book_id, default_value=n)
        mi.pubdate = self._field_for('pubdate', book_id, default_value=n)
        mi.uuid = self._field_for('uuid', book_id, default_value='dummy')
        mi.title_sort = self._field_for('sort',
                                        book_id,
                                        default_value=_('Unknown'))
        mi.book_size = self._field_for('size', book_id, default_value=0)
        mi.ondevice_col = self._field_for('ondevice',
                                          book_id,
                                          default_value='')
        mi.last_modified = self._field_for('last_modified',
                                           book_id,
                                           default_value=n)
        formats = self._field_for('formats', book_id)
        mi.format_metadata = {}
        mi.languages = list(self._field_for('languages', book_id))
        if not formats:
            good_formats = None
        else:
            mi.format_metadata = FormatMetadata(self, book_id, formats)
            good_formats = FormatsList(formats, mi.format_metadata)
        mi.formats = good_formats
        mi.has_cover = _('Yes') if self._field_for(
            'cover', book_id, default_value=False) else ''
        mi.tags = list(self._field_for('tags', book_id, default_value=()))
        mi.series = self._field_for('series', book_id)
        if mi.series:
            mi.series_index = self._field_for('series_index',
                                              book_id,
                                              default_value=1.0)
        mi.rating = self._field_for('rating', book_id)
        mi.set_identifiers(
            self._field_for('identifiers', book_id, default_value={}))
        mi.application_id = book_id
        mi.id = book_id
        composites = []
        for key, meta in self.field_metadata.custom_iteritems():
            mi.set_user_metadata(key, meta)
            if meta['datatype'] == 'composite':
                composites.append(key)
            else:
                val = self._field_for(key, book_id)
                if isinstance(val, tuple):
                    val = list(val)
                extra = self._field_for(key + '_index', book_id)
                mi.set(key, val=val, extra=extra)
        for key in composites:
            mi.set(key, val=self._composite_for(key, book_id, mi))

        user_cat_vals = {}
        if get_user_categories:
            user_cats = self.backend.prefs['user_categories']
            for ucat in user_cats:
                res = []
                for name, cat, ign in user_cats[ucat]:
                    v = mi.get(cat, None)
                    if isinstance(v, list):
                        if name in v:
                            res.append([name, cat])
                    elif name == v:
                        res.append([name, cat])
                user_cat_vals[ucat] = res
        mi.user_categories = user_cat_vals

        return mi
Beispiel #15
0
    def test_legacy_adding_books(self):  # {{{
        "Test various adding/deleting books methods"
        from calibre.ebooks.metadata.book.base import Metadata

        legacy, old = self.init_legacy(self.cloned_library), self.init_old(self.cloned_library)
        mi = Metadata("Added Book0", authors=("Added Author",))
        with NamedTemporaryFile(suffix=".aff") as f:
            f.write(b"xxx")
            f.flush()
            T = partial(ET, "add_books", ([f.name], ["AFF"], [mi]), old=old, legacy=legacy)
            T()(self)
            book_id = T(kwargs={"return_ids": True})(self)[1][0]
            self.assertEqual(legacy.new_api.formats(book_id), ("AFF",))
            T(kwargs={"add_duplicates": False})(self)
            mi.title = "Added Book1"
            mi.uuid = "uuu"
            T = partial(ET, "import_book", (mi, [f.name]), old=old, legacy=legacy)
            book_id = T()(self)
            self.assertNotEqual(legacy.uuid(book_id, index_is_id=True), old.uuid(book_id, index_is_id=True))
            book_id = T(kwargs={"preserve_uuid": True})(self)
            self.assertEqual(legacy.uuid(book_id, index_is_id=True), old.uuid(book_id, index_is_id=True))
            self.assertEqual(legacy.new_api.formats(book_id), ("AFF",))

            T = partial(ET, "add_format", old=old, legacy=legacy)
            T((0, "AFF", BytesIO(b"fffff")))(self)
            T((0, "AFF", BytesIO(b"fffff")))(self)
            T((0, "AFF", BytesIO(b"fffff")), {"replace": True})(self)
        with NamedTemporaryFile(suffix=".opf") as f:
            f.write(b"zzzz")
            f.flush()
            T = partial(ET, "import_book", (mi, [f.name]), old=old, legacy=legacy)
            book_id = T()(self)
            self.assertFalse(legacy.new_api.formats(book_id))

        mi.title = "Added Book2"
        T = partial(ET, "create_book_entry", (mi,), old=old, legacy=legacy)
        T()
        T({"add_duplicates": False})
        T({"force_id": 1000})

        with NamedTemporaryFile(suffix=".txt") as f:
            f.write(b"tttttt")
            f.seek(0)
            bid = legacy.add_catalog(f.name, "My Catalog")
            self.assertEqual(old.add_catalog(f.name, "My Catalog"), bid)
            cache = legacy.new_api
            self.assertEqual(cache.formats(bid), ("TXT",))
            self.assertEqual(cache.field_for("title", bid), "My Catalog")
            self.assertEqual(cache.field_for("authors", bid), ("calibre",))
            self.assertEqual(cache.field_for("tags", bid), (_("Catalog"),))
            self.assertTrue(bid < legacy.add_catalog(f.name, "Something else"))
            self.assertEqual(legacy.add_catalog(f.name, "My Catalog"), bid)
            self.assertEqual(old.add_catalog(f.name, "My Catalog"), bid)

            bid = legacy.add_news(f.name, {"title": "Events", "add_title_tag": True, "custom_tags": ("one", "two")})
            self.assertEqual(cache.formats(bid), ("TXT",))
            self.assertEqual(cache.field_for("authors", bid), ("calibre",))
            self.assertEqual(cache.field_for("tags", bid), (_("News"), "Events", "one", "two"))

        self.assertTrue(legacy.cover(1, index_is_id=True))
        origcov = legacy.cover(1, index_is_id=True)
        self.assertTrue(legacy.has_cover(1))
        legacy.remove_cover(1)
        self.assertFalse(legacy.has_cover(1))
        self.assertFalse(legacy.cover(1, index_is_id=True))
        legacy.set_cover(3, origcov)
        self.assertEqual(legacy.cover(3, index_is_id=True), origcov)
        self.assertTrue(legacy.has_cover(3))

        self.assertTrue(legacy.format(1, "FMT1", index_is_id=True))
        legacy.remove_format(1, "FMT1", index_is_id=True)
        self.assertIsNone(legacy.format(1, "FMT1", index_is_id=True))

        legacy.delete_book(1)
        old.delete_book(1)
        self.assertNotIn(1, legacy.all_ids())
        legacy.dump_metadata((2, 3))
        old.close()
Beispiel #16
0
    def merge(self, results, min_year, do_asr=True):
        ans = Metadata(_('Unknown'))

        # We assume the shortest title has the least cruft in it
        ans.title = self.length_merge('title', results, null_value=ans.title)

        # No harm in having extra authors, maybe something useful like an
        # editor or translator
        ans.authors = self.length_merge('authors', results,
                null_value=ans.authors, shortest=False)

        # We assume the shortest publisher has the least cruft in it
        ans.publisher = self.length_merge('publisher', results,
                null_value=ans.publisher)

        # We assume the smallest set of tags has the least cruft in it
        ans.tags = self.length_merge('tags', results,
                null_value=ans.tags, shortest=msprefs['fewer_tags'])

        # We assume the longest series has the most info in it
        ans.series = self.length_merge('series', results,
                null_value=ans.series, shortest=False)
        for r in results:
            if r.series and r.series == ans.series:
                ans.series_index = r.series_index
                break

        # Average the rating over all sources
        ratings = []
        for r in results:
            rating = r.rating
            if rating and rating > 0 and rating <= 5:
                ratings.append(rating)
        if ratings:
            ans.rating = int(round(sum(ratings)/len(ratings)))

        # Smallest language is likely to be valid
        ans.language = self.length_merge('language', results,
                null_value=ans.language)

        # Choose longest comments
        ans.comments = self.length_merge('comments', results,
                null_value=ans.comments, shortest=False)

        # Published date
        if min_year:
            for r in results:
                year = getattr(r.pubdate, 'year', None)
                if year == min_year:
                    ans.pubdate = r.pubdate
                    break
            if getattr(ans.pubdate, 'year', None) == min_year:
                min_date = datetime(min_year, ans.pubdate.month, ans.pubdate.day,
                                    tzinfo=utc_tz)
            else:
                min_date = datetime(min_year, 1, 2, tzinfo=utc_tz)
            ans.pubdate = min_date
        else:
            min_date = datetime(3001, 1, 1, tzinfo=utc_tz)
            for r in results:
                if r.pubdate is not None:
                    candidate = as_utc(r.pubdate)
                    if candidate < min_date:
                        min_date = candidate
            if min_date.year < 3000:
                ans.pubdate = min_date

        # Identifiers
        for r in results:
            ans.identifiers.update(r.identifiers)

        # Cover URL
        ans.has_cached_cover_url = bool([r for r in results if
            getattr(r, 'has_cached_cover_url', False)])

        # Merge any other fields with no special handling (random merge)
        touched_fields = set()
        for r in results:
            if hasattr(r, 'identify_plugin'):
                touched_fields |= r.identify_plugin.touched_fields

        for f in touched_fields:
            if f.startswith('identifier:') or not ans.is_null(f):
                continue
            setattr(ans, f, self.random_merge(f, results,
                null_value=getattr(ans, f)))

        if do_asr:
            avg = [x.relevance_in_source for x in results]
            avg = sum(avg)/len(avg)
            ans.average_source_relevance = avg

        return ans
Beispiel #17
0
    def _get_metadata(self, book_id, get_user_categories=True):  # {{{
        mi = Metadata(None, template_cache=self.formatter_template_cache)
        author_ids = self._field_ids_for('authors', book_id)
        aut_list = [self._author_data(i) for i in author_ids]
        aum = []
        aus = {}
        aul = {}
        for rec in aut_list:
            aut = rec['name']
            aum.append(aut)
            aus[aut] = rec['sort']
            aul[aut] = rec['link']
        mi.title       = self._field_for('title', book_id,
                default_value=_('Unknown'))
        mi.authors     = aum
        mi.author_sort = self._field_for('author_sort', book_id,
                default_value=_('Unknown'))
        mi.author_sort_map = aus
        mi.author_link_map = aul
        mi.comments    = self._field_for('comments', book_id)
        mi.publisher   = self._field_for('publisher', book_id)
        n = nowf()
        mi.timestamp   = self._field_for('timestamp', book_id, default_value=n)
        mi.pubdate     = self._field_for('pubdate', book_id, default_value=n)
        mi.uuid        = self._field_for('uuid', book_id,
                default_value='dummy')
        mi.title_sort  = self._field_for('sort', book_id,
                default_value=_('Unknown'))
        mi.book_size   = self._field_for('size', book_id, default_value=0)
        mi.ondevice_col = self._field_for('ondevice', book_id, default_value='')
        mi.last_modified = self._field_for('last_modified', book_id,
                default_value=n)
        formats = self._field_for('formats', book_id)
        mi.format_metadata = {}
        mi.languages = list(self._field_for('languages', book_id))
        if not formats:
            good_formats = None
        else:
            mi.format_metadata = FormatMetadata(self, book_id, formats)
            good_formats = FormatsList(formats, mi.format_metadata)
        mi.formats = good_formats
        mi.has_cover = _('Yes') if self._field_for('cover', book_id,
                default_value=False) else ''
        mi.tags = list(self._field_for('tags', book_id, default_value=()))
        mi.series = self._field_for('series', book_id)
        if mi.series:
            mi.series_index = self._field_for('series_index', book_id,
                    default_value=1.0)
        mi.rating = self._field_for('rating', book_id)
        mi.set_identifiers(self._field_for('identifiers', book_id,
            default_value={}))
        mi.application_id = book_id
        mi.id = book_id
        composites = []
        for key, meta in self.field_metadata.custom_iteritems():
            mi.set_user_metadata(key, meta)
            if meta['datatype'] == 'composite':
                composites.append(key)
            else:
                val = self._field_for(key, book_id)
                if isinstance(val, tuple):
                    val = list(val)
                extra = self._field_for(key+'_index', book_id)
                mi.set(key, val=val, extra=extra)
        for key in composites:
            mi.set(key, val=self._composite_for(key, book_id, mi))

        user_cat_vals = {}
        if get_user_categories:
            user_cats = self.backend.prefs['user_categories']
            for ucat in user_cats:
                res = []
                for name,cat,ign in user_cats[ucat]:
                    v = mi.get(cat, None)
                    if isinstance(v, list):
                        if name in v:
                            res.append([name,cat])
                    elif name == v:
                        res.append([name,cat])
                user_cat_vals[ucat] = res
        mi.user_categories = user_cat_vals

        return mi
Beispiel #18
0
def metadata_from_xmp_packet(raw_bytes):
    root = parse_xmp_packet(raw_bytes)
    mi = Metadata(_('Unknown'))
    title = first_alt('//dc:title', root)
    if title.startswith(r'\376\377'):
        # corrupted XMP packet generated by Nitro PDF. See
        # https://bugs.launchpad.net/calibre/+bug/1541981
        raise ValueError('Corrupted XMP metadata packet detected, probably generated by Nitro PDF')
    if title:
        mi.title = title
    authors = multiple_sequences('//dc:creator', root)
    if authors:
        mi.authors = authors
    tags = multiple_sequences('//dc:subject', root) or multiple_sequences('//pdf:Keywords', root)
    if tags:
        mi.tags = tags
    comments = first_alt('//dc:description', root)
    if comments:
        mi.comments = comments
    publishers = multiple_sequences('//dc:publisher', root)
    if publishers:
        mi.publisher = publishers[0]
    try:
        pubdate = parse_date(first_sequence('//dc:date', root) or first_simple('//xmp:CreateDate', root), assume_utc=False)
    except:
        pass
    else:
        mi.pubdate = pubdate
    bkp = first_simple('//xmp:CreatorTool', root)
    if bkp:
        mi.book_producer = bkp
    md = safe_parse_date(first_simple('//xmp:MetadataDate', root))
    mod = safe_parse_date(first_simple('//xmp:ModifyDate', root))
    fd = more_recent(md, mod)
    if fd is not None:
        mi.metadata_date = fd
    rating = first_simple('//calibre:rating', root)
    if rating is not None:
        try:
            rating = float(rating)
            if 0 <= rating <= 10:
                mi.rating = rating
        except (ValueError, TypeError):
            pass
    series, series_index = read_series(root)
    if series:
        mi.series, mi.series_index = series, series_index
    for x in ('title_sort', 'author_sort'):
        for elem in XPath('//calibre:' + x)(root):
            val = read_simple_property(elem)
            if val:
                setattr(mi, x, val)
                break
    for x in ('author_link_map', 'user_categories'):
        val = first_simple('//calibre:'+x, root)
        if val:
            try:
                setattr(mi, x, json.loads(val))
            except:
                pass

    languages = multiple_sequences('//dc:language', root)
    if languages:
        languages = filter(None, map(canonicalize_lang, languages))
        if languages:
            mi.languages = languages

    identifiers = {}
    for xmpid in XPath('//xmp:Identifier')(root):
        for scheme, value in read_xmp_identifers(xmpid):
            if scheme and value:
                identifiers[scheme.lower()] = value

    for namespace in ('prism', 'pdfx'):
        for scheme in KNOWN_ID_SCHEMES:
            if scheme not in identifiers:
                val = first_simple('//%s:%s' % (namespace, scheme), root)
                scheme = scheme.lower()
                if scheme == 'isbn':
                    val = check_isbn(val)
                elif scheme == 'doi':
                    val = check_doi(val)
                if val:
                    identifiers[scheme] = val

    # Check Dublin Core for recognizable identifier types
    for scheme, check_func in {'doi':check_doi, 'isbn':check_isbn}.iteritems():
        if scheme not in identifiers:
            val = check_func(first_simple('//dc:identifier', root))
            if val:
                identifiers['doi'] = val

    if identifiers:
        mi.set_identifiers(identifiers)

    read_user_metadata(mi, root)

    return mi
Beispiel #19
0
def metadata_from_xmp_packet(raw_bytes):
    root = parse_xmp_packet(raw_bytes)
    mi = Metadata(_('Unknown'))
    title = first_alt('//dc:title', root)
    if title:
        mi.title = title
    authors = multiple_sequences('//dc:creator', root)
    if authors:
        mi.authors = authors
    tags = multiple_sequences('//dc:subject', root) or multiple_sequences('//pdf:Keywords', root)
    if tags:
        mi.tags = tags
    comments = first_alt('//dc:description', root)
    if comments:
        mi.comments = comments
    publishers = multiple_sequences('//dc:publisher', root)
    if publishers:
        mi.publisher = publishers[0]
    try:
        pubdate = parse_date(first_sequence('//dc:date', root) or first_simple('//xmp:CreateDate', root), assume_utc=False)
    except:
        pass
    else:
        mi.pubdate = pubdate
    bkp = first_simple('//xmp:CreatorTool', root)
    if bkp:
        mi.book_producer = bkp
    rating = first_simple('//calibre:rating', root)
    if rating is not None:
        try:
            rating = float(rating)
            if 0 <= rating <= 10:
                mi.rating = rating
        except (ValueError, TypeError):
            pass
    series, series_index = read_series(root)
    if series:
        mi.series, mi.series_index = series, series_index
    for x in ('title_sort', 'author_sort'):
        for elem in XPath('//calibre:' + x)(root):
            val = read_simple_property(elem)
            if val:
                setattr(mi, x, val)
                break

    languages = multiple_sequences('//dc:language', root)
    if languages:
        languages = filter(None, map(canonicalize_lang, languages))
        if languages:
            mi.languages = languages

    identifiers = {}
    for xmpid in XPath('//xmp:Identifier')(root):
        for scheme, value in read_xmp_identifers(xmpid):
            if scheme and value:
                identifiers[scheme.lower()] = value

    for namespace in ('prism', 'pdfx'):
        for scheme in KNOWN_ID_SCHEMES:
            if scheme not in identifiers:
                val = first_simple('//%s:%s' % (namespace, scheme), root)
                scheme = scheme.lower()
                if scheme == 'isbn':
                    val = check_isbn(val)
                elif scheme == 'doi':
                    val = check_doi(val)
                if val:
                    identifiers[scheme] = val

    # Check Dublin Core for recognizable identifier types
    for scheme, check_func in {'doi':check_doi, 'isbn':check_isbn}.iteritems():
        if scheme not in identifiers:
            val = check_func(first_simple('//dc:identifier', root))
            if val:
                identifiers['doi'] = val

    if identifiers:
        mi.set_identifiers(identifiers)

    return mi
Beispiel #20
0
def metadata_from_xmp_packet(raw_bytes):
    root = parse_xmp_packet(raw_bytes)
    mi = Metadata(_('Unknown'))
    title = first_alt('//dc:title', root)
    if title:
        mi.title = title
    authors = multiple_sequences('//dc:creator', root)
    if authors:
        mi.authors = authors
    tags = multiple_sequences('//dc:subject', root) or multiple_sequences(
        '//pdf:Keywords', root)
    if tags:
        mi.tags = tags
    comments = first_alt('//dc:description', root)
    if comments:
        mi.comments = comments
    publishers = multiple_sequences('//dc:publisher', root)
    if publishers:
        mi.publisher = publishers[0]
    try:
        pubdate = parse_date(first_sequence('//dc:date', root)
                             or first_simple('//xmp:CreateDate', root),
                             assume_utc=False)
    except:
        pass
    else:
        mi.pubdate = pubdate
    bkp = first_simple('//xmp:CreatorTool', root)
    if bkp:
        mi.book_producer = bkp
    md = first_simple('//xmp:MetadataDate', root)
    if md:
        try:
            mi.metadata_date = parse_date(md)
        except:
            pass
    rating = first_simple('//calibre:rating', root)
    if rating is not None:
        try:
            rating = float(rating)
            if 0 <= rating <= 10:
                mi.rating = rating
        except (ValueError, TypeError):
            pass
    series, series_index = read_series(root)
    if series:
        mi.series, mi.series_index = series, series_index
    for x in ('title_sort', 'author_sort'):
        for elem in XPath('//calibre:' + x)(root):
            val = read_simple_property(elem)
            if val:
                setattr(mi, x, val)
                break
    for x in ('author_link_map', 'user_categories'):
        val = first_simple('//calibre:' + x, root)
        if val:
            try:
                setattr(mi, x, json.loads(val))
            except:
                pass

    languages = multiple_sequences('//dc:language', root)
    if languages:
        languages = filter(None, map(canonicalize_lang, languages))
        if languages:
            mi.languages = languages

    identifiers = {}
    for xmpid in XPath('//xmp:Identifier')(root):
        for scheme, value in read_xmp_identifers(xmpid):
            if scheme and value:
                identifiers[scheme.lower()] = value

    for namespace in ('prism', 'pdfx'):
        for scheme in KNOWN_ID_SCHEMES:
            if scheme not in identifiers:
                val = first_simple('//%s:%s' % (namespace, scheme), root)
                scheme = scheme.lower()
                if scheme == 'isbn':
                    val = check_isbn(val)
                elif scheme == 'doi':
                    val = check_doi(val)
                if val:
                    identifiers[scheme] = val

    # Check Dublin Core for recognizable identifier types
    for scheme, check_func in {
            'doi': check_doi,
            'isbn': check_isbn
    }.iteritems():
        if scheme not in identifiers:
            val = check_func(first_simple('//dc:identifier', root))
            if val:
                identifiers['doi'] = val

    if identifiers:
        mi.set_identifiers(identifiers)

    read_user_metadata(mi, root)

    return mi