Beispiel #1
0
def convert_markdown_with_metadata(txt, title='', extensions=DEFAULT_MD_EXTENSIONS):
    from calibre.ebooks.metadata.book.base import Metadata
    from calibre.utils.date import parse_only_date
    from calibre.db.write import get_series_values
    if 'meta' not in extensions:
        extensions.append('meta')
    md = create_markdown_object(extensions)
    html = md.convert(txt)
    mi = Metadata(title or _('Unknown'))
    m = md.Meta
    for k, v in iteritems({'date':'pubdate', 'summary':'comments'}):
        if v not in m and k in m:
            m[v] = m.pop(k)
    for k in 'title authors series tags pubdate comments publisher rating'.split():
        val = m.get(k)
        if val:
            mf = mi.metadata_for_field(k)
            if not mf.get('is_multiple'):
                val = val[0]
            if k == 'series':
                val, si = get_series_values(val)
                mi.series_index = 1 if si is None else si
            if k == 'rating':
                try:
                    val = max(0, min(int(float(val)), 10))
                except Exception:
                    continue
            if mf.get('datatype') == 'datetime':
                try:
                    val = parse_only_date(val, assume_utc=False)
                except Exception:
                    continue
            setattr(mi, k, val)
    return mi, HTML_TEMPLATE % (mi.title, html)
Beispiel #2
0
    def metadata(self):
        mi = Metadata(_('Unknown'))
        dp_name, ap_name = self.get_document_properties_names()
        if dp_name:
            try:
                raw = self.read(dp_name)
            except KeyError:
                pass
            else:
                read_doc_props(raw, mi, self.namespace.XPath)
        if mi.is_null('language'):
            try:
                raw = self.read('word/styles.xml')
            except KeyError:
                pass
            else:
                read_default_style_language(raw, mi, self.namespace.XPath)

        ap_name = self.relationships.get(self.namespace.names['APPPROPS'], None)
        if ap_name:
            try:
                raw = self.read(ap_name)
            except KeyError:
                pass
            else:
                read_app_props(raw, mi)

        return mi
Beispiel #3
0
 def __init__(self, storage_id, lpath, other=None):
     Metadata.__init__(self, _('Unknown'), other=other)
     self.storage_id, self.lpath = storage_id, lpath
     self.lpath = self.path = self.lpath.replace(os.sep, '/')
     self.mtp_relpath = tuple([icu_lower(x) for x in self.lpath.split('/')])
     self.datetime = utcnow().timetuple()
     self.thumbail = None
Beispiel #4
0
 def opdsToMetadata(self, opdsBookStructure):
     authors = opdsBookStructure.author.replace(u'& ', u'&')
     metadata = Metadata(opdsBookStructure.title, authors.split(u'&'))
     metadata.uuid = opdsBookStructure.id.replace('urn:uuid:', '', 1)
     rawTimestamp = opdsBookStructure.updated
     parsableTimestamp = re.sub('((\.[0-9]+)?\+00:00|Z)$', '', rawTimestamp)
     metadata.timestamp = datetime.datetime.strptime(parsableTimestamp, '%Y-%m-%dT%H:%M:%S')
     tags = []
     summary = opdsBookStructure.get(u'summary', u'')
     summarylines = summary.splitlines()
     for summaryline in summarylines:
         if summaryline.startswith(u'TAGS: '):
             tagsline = summaryline.replace(u'TAGS: ', u'')
             tagsline = tagsline.replace(u'<br />',u'')
             tagsline = tagsline.replace(u', ', u',')
             tags = tagsline.split(u',')
     metadata.tags = tags
     bookDownloadUrls = []
     links = opdsBookStructure.get('links', [])
     for link in links:
         url = link.get('href', '')
         bookType = link.get('type', '')
         # Skip covers and thumbnails
         if not bookType.startswith('image/'):
             if bookType == 'application/epub+zip':
                 # EPUB books are preferred and always put at the head of the list if found
                 bookDownloadUrls.insert(0, url)
             else:
                 # Formats other than EPUB (eg. AZW), are appended as they are found
                 bookDownloadUrls.append(url)
     metadata.links = bookDownloadUrls
     return metadata
Beispiel #5
0
    def test_legacy_adding_books(self):  # {{{
        'Test various adding books methods'
        from calibre.ebooks.metadata.book.base import Metadata
        legacy, old = self.init_legacy(self.cloned_library), self.init_old(self.cloned_library)
        mi = Metadata('Added Book0', authors=('Added Author',))
        with NamedTemporaryFile(suffix='.aff') as f:
            f.write(b'xxx')
            f.flush()
            T = partial(ET, 'add_books', ([f.name], ['AFF'], [mi]), old=old, legacy=legacy)
            T()(self)
            book_id = T(kwargs={'return_ids':True})(self)[1][0]
            self.assertEqual(legacy.new_api.formats(book_id), ('AFF',))
            T(kwargs={'add_duplicates':False})(self)
            mi.title = 'Added Book1'
            mi.uuid = 'uuu'
            T = partial(ET, 'import_book', (mi,[f.name]), old=old, legacy=legacy)
            book_id = T()(self)
            self.assertNotEqual(legacy.uuid(book_id, index_is_id=True), old.uuid(book_id, index_is_id=True))
            book_id = T(kwargs={'preserve_uuid':True})(self)
            self.assertEqual(legacy.uuid(book_id, index_is_id=True), old.uuid(book_id, index_is_id=True))
            self.assertEqual(legacy.new_api.formats(book_id), ('AFF',))
        with NamedTemporaryFile(suffix='.opf') as f:
            f.write(b'zzzz')
            f.flush()
            T = partial(ET, 'import_book', (mi,[f.name]), old=old, legacy=legacy)
            book_id = T()(self)
            self.assertFalse(legacy.new_api.formats(book_id))

        mi.title = 'Added Book2'
        T = partial(ET, 'create_book_entry', (mi,), old=old, legacy=legacy)
        T()
        T({'add_duplicates':False})
        T({'force_id':1000})
Beispiel #6
0
    def to_metadata_for_single_entry(self, log, ozon_id, title, authors):  # {{{

        # parsing javascript data from the redirect page
        mi = Metadata(title, authors)
        mi.identifiers = {'ozon': ozon_id}

        return mi
Beispiel #7
0
def add_catalog(cache, path, title, dbapi=None):
    from calibre.ebooks.metadata.book.base import Metadata
    from calibre.ebooks.metadata.meta import get_metadata
    from calibre.utils.date import utcnow

    fmt = os.path.splitext(path)[1][1:].lower()
    new_book_added = False
    with lopen(path, 'rb') as stream:
        with cache.write_lock:
            matches = cache._search('title:="%s" and tags:="%s"' % (title.replace('"', '\\"'), _('Catalog')), None)
            db_id = None
            if matches:
                db_id = list(matches)[0]
            try:
                mi = get_metadata(stream, fmt)
                mi.authors = ['calibre']
            except:
                mi = Metadata(title, ['calibre'])
            mi.title, mi.authors = title, ['calibre']
            mi.author_sort = 'calibre'  # The MOBI/AZW3 format sets author sort to date
            mi.tags = [_('Catalog')]
            mi.pubdate = mi.timestamp = utcnow()
            if fmt == 'mobi':
                mi.cover, mi.cover_data = None, (None, None)
            if db_id is None:
                db_id = cache._create_book_entry(mi, apply_import_tags=False)
                new_book_added = True
            else:
                cache._set_metadata(db_id, mi)
        cache.add_format(db_id, fmt, stream, dbapi=dbapi)  # Cant keep write lock since post-import hooks might run

    return db_id, new_book_added
Beispiel #8
0
    def get_metadata_from_detail(self, log, entry, title, authors, identifiers):  # {{{
        title = unicode(entry.xpath(u'normalize-space(.//h1[@itemprop="name"][1]/text())'))
        # log.debug(u'Tile (from_detail): -----> %s' % title)

        author = unicode(entry.xpath(u'normalize-space(.//a[contains(@href, "person")][1]/text())'))
        # log.debug(u'Author (from_detail): -----> %s' % author)

        norm_authors = map(_normalizeAuthorNameWithInitials, map(unicode.strip, unicode(author).split(u',')))
        mi = Metadata(title, norm_authors)

        ozon_id = entry.xpath(u'substring-before(substring-after(normalize-space(.//a[starts-with(@href, "/context/detail/id/")][1]/@href), "id/"), "/")')
        if ozon_id:
            # log.debug(u'ozon_id (from_detail): -----> %s' % ozon_id)
            mi.identifiers = {'ozon':ozon_id}

        mi.ozon_cover_url = None
        cover = entry.xpath(u'normalize-space(.//img[1]/@src)')
        if cover:
            mi.ozon_cover_url = _translateToBigCoverUrl(cover)
            # log.debug(u'mi.ozon_cover_url  (from_detail): -----> %s' % mi.ozon_cover_url)

        mi.rating = self.get_rating(entry)
        # log.debug(u'mi.rating  (from_detail): -----> %s' % mi.rating)
        if not mi.rating:
            log.debug('No rating (from_detail) found. ozon_id:%s'%ozon_id)

        return mi
Beispiel #9
0
def test(scale=0.5):
    from PyQt5.Qt import QLabel, QApplication, QPixmap, QMainWindow, QWidget, QScrollArea, QGridLayout
    app = QApplication([])
    mi = Metadata('xxx', ['Kovid Goyal', 'John Q. Doe', 'Author'])
    mi.series = 'A series of styles'
    m = QMainWindow()
    sa = QScrollArea(m)
    w = QWidget(m)
    sa.setWidget(w)
    l = QGridLayout(w)
    w.setLayout(l), l.setSpacing(30)
    labels = []
    for r, color in enumerate(sorted(default_color_themes)):
        for c, style in enumerate(sorted(all_styles())):
            mi.series_index = c + 1
            mi.title = 'An algorithmic cover [%s]' % color
            prefs = override_prefs(cprefs, override_color_theme=color, override_style=style)
            for x in ('cover_width', 'cover_height', 'title_font_size', 'subtitle_font_size', 'footer_font_size'):
                prefs[x] = int(scale * prefs[x])
            img = generate_cover(mi, prefs=prefs, as_qimage=True)
            la = QLabel()
            la.setPixmap(QPixmap.fromImage(img))
            l.addWidget(la, r, c)
            labels.append(la)
    m.setCentralWidget(sa)
    w.resize(w.sizeHint())
    m.show()
    app.exec_()
Beispiel #10
0
def test(scale=0.25):
    from PyQt5.Qt import QLabel, QPixmap, QMainWindow, QWidget, QScrollArea, QGridLayout
    from calibre.gui2 import Application
    app = Application([])
    mi = Metadata('Unknown', ['Kovid Goyal', 'John & Doe', 'Author'])
    mi.series = 'A series & styles'
    m = QMainWindow()
    sa = QScrollArea(m)
    w = QWidget(m)
    sa.setWidget(w)
    l = QGridLayout(w)
    w.setLayout(l), l.setSpacing(30)
    scale *= w.devicePixelRatioF()
    labels = []
    for r, color in enumerate(sorted(default_color_themes)):
        for c, style in enumerate(sorted(all_styles())):
            mi.series_index = c + 1
            mi.title = 'An algorithmic cover [%s]' % color
            prefs = override_prefs(cprefs, override_color_theme=color, override_style=style)
            scale_cover(prefs, scale)
            img = generate_cover(mi, prefs=prefs, as_qimage=True)
            img.setDevicePixelRatio(w.devicePixelRatioF())
            la = QLabel()
            la.setPixmap(QPixmap.fromImage(img))
            l.addWidget(la, r, c)
            labels.append(la)
    m.setCentralWidget(sa)
    w.resize(w.sizeHint())
    m.show()
    app.exec_()
Beispiel #11
0
def add_catalog(cache, path, title):
    from calibre.ebooks.metadata.book.base import Metadata
    from calibre.ebooks.metadata.meta import get_metadata
    from calibre.utils.date import utcnow

    fmt = os.path.splitext(path)[1][1:].lower()
    with lopen(path, 'rb') as stream, cache.write_lock:
        matches = cache._search('title:="%s" and tags:="%s"' % (title.replace('"', '\\"'), _('Catalog')), None)
        db_id = None
        if matches:
            db_id = list(matches)[0]
        try:
            mi = get_metadata(stream, fmt)
            mi.authors = ['calibre']
        except:
            mi = Metadata(title, ['calibre'])
        mi.title, mi.authors = title, ['calibre']
        mi.tags = [_('Catalog')]
        mi.pubdate = mi.timestamp = utcnow()
        if fmt == 'mobi':
            mi.cover, mi.cover_data = None, (None, None)
        if db_id is None:
            db_id = cache._create_book_entry(mi, apply_import_tags=False)
        else:
            cache._set_metadata(db_id, mi)
        cache._add_format(db_id, fmt, stream)

    return db_id
Beispiel #12
0
def get_series(title, authors, timeout=60):
    mi = Metadata(title, authors)
    if title and title[0] in _ignore_starts:
        title = title[1:]
    title = re.sub(r'^(A|The|An)\s+', '', title).strip()
    if not title:
        return mi
    if isinstance(title, unicode):
        title = title.encode('utf-8')

    title = urllib.quote_plus(title)

    author = authors[0].strip()
    if not author:
        return mi
    if ',' in author:
        author = author.split(',')[0]
    else:
        author = author.split()[-1]

    url = URL.format(author, title)
    br = browser()
    try:
        raw = br.open_novisit(url, timeout=timeout).read()
    except URLError as e:
        if isinstance(e.reason, socket.timeout):
            raise Exception('KDL Server busy, try again later')
        raise
    if 'see the full results' not in raw:
        return mi
    raw = xml_to_unicode(raw)[0]
    soup = BeautifulSoup(raw)
    searcharea = soup.find('div', attrs={'class':'searcharea'})
    if searcharea is None:
        return mi
    ss = searcharea.find('div', attrs={'class':'seriessearch'})
    if ss is None:
        return mi
    a = ss.find('a', href=True)
    if a is None:
        return mi
    href = a['href'].partition('?')[-1]
    data = urlparse.parse_qs(href)
    series = data.get('SeriesName', [])
    if not series:
        return mi
    series = series[0]
    series = re.sub(r' series$', '', series).strip()
    if series:
        mi.series = series
    ns = ss.nextSibling
    if ns.contents:
        raw = unicode(ns.contents[0])
        raw = raw.partition('.')[0].strip()
        try:
            mi.series_index = int(raw)
        except:
            pass
    return mi
Beispiel #13
0
    def test_legacy_adding_books(self):  # {{{
        'Test various adding books methods'
        from calibre.ebooks.metadata.book.base import Metadata
        legacy, old = self.init_legacy(self.cloned_library), self.init_old(self.cloned_library)
        mi = Metadata('Added Book0', authors=('Added Author',))
        with NamedTemporaryFile(suffix='.aff') as f:
            f.write(b'xxx')
            f.flush()
            T = partial(ET, 'add_books', ([f.name], ['AFF'], [mi]), old=old, legacy=legacy)
            T()(self)
            book_id = T(kwargs={'return_ids':True})(self)[1][0]
            self.assertEqual(legacy.new_api.formats(book_id), ('AFF',))
            T(kwargs={'add_duplicates':False})(self)
            mi.title = 'Added Book1'
            mi.uuid = 'uuu'
            T = partial(ET, 'import_book', (mi,[f.name]), old=old, legacy=legacy)
            book_id = T()(self)
            self.assertNotEqual(legacy.uuid(book_id, index_is_id=True), old.uuid(book_id, index_is_id=True))
            book_id = T(kwargs={'preserve_uuid':True})(self)
            self.assertEqual(legacy.uuid(book_id, index_is_id=True), old.uuid(book_id, index_is_id=True))
            self.assertEqual(legacy.new_api.formats(book_id), ('AFF',))

            T = partial(ET, 'add_format', old=old, legacy=legacy)
            T((0, 'AFF', BytesIO(b'fffff')))(self)
            T((0, 'AFF', BytesIO(b'fffff')))(self)
            T((0, 'AFF', BytesIO(b'fffff')), {'replace':True})(self)
        with NamedTemporaryFile(suffix='.opf') as f:
            f.write(b'zzzz')
            f.flush()
            T = partial(ET, 'import_book', (mi,[f.name]), old=old, legacy=legacy)
            book_id = T()(self)
            self.assertFalse(legacy.new_api.formats(book_id))

        mi.title = 'Added Book2'
        T = partial(ET, 'create_book_entry', (mi,), old=old, legacy=legacy)
        T()
        T({'add_duplicates':False})
        T({'force_id':1000})

        with NamedTemporaryFile(suffix='.txt') as f:
            f.write(b'tttttt')
            f.seek(0)
            bid = legacy.add_catalog(f.name, 'My Catalog')
            self.assertEqual(old.add_catalog(f.name, 'My Catalog'), bid)
            cache = legacy.new_api
            self.assertEqual(cache.formats(bid), ('TXT',))
            self.assertEqual(cache.field_for('title', bid), 'My Catalog')
            self.assertEqual(cache.field_for('authors', bid), ('calibre',))
            self.assertEqual(cache.field_for('tags', bid), (_('Catalog'),))
            self.assertTrue(bid < legacy.add_catalog(f.name, 'Something else'))
            self.assertEqual(legacy.add_catalog(f.name, 'My Catalog'), bid)
            self.assertEqual(old.add_catalog(f.name, 'My Catalog'), bid)

            bid = legacy.add_news(f.name, {'title':'Events', 'add_title_tag':True, 'custom_tags':('one', 'two')})
            self.assertEqual(cache.formats(bid), ('TXT',))
            self.assertEqual(cache.field_for('authors', bid), ('calibre',))
            self.assertEqual(cache.field_for('tags', bid), (_('News'), 'Events', 'one', 'two'))

        old.close()
Beispiel #14
0
def metadata_from_dict(src):
    ans = Metadata('Unknown')
    for key, value in src.iteritems():
        if key == 'user_metadata':
            ans.set_all_user_metadata(value)
        else:
            setattr(ans, key, value)
    return ans
Beispiel #15
0
def read_metadata_kfx(stream, read_cover=True):
    ' Read the metadata.kfx file that is found in the sdr book folder for KFX files '
    c = Container(stream.read())
    m = extract_metadata(c.decode())
    # dump_metadata(m)

    def has(x):
        return m[x] and m[x][0]

    def get(x, single=True):
        ans = m[x]
        if single:
            ans = clean_xml_chars(ans[0]) if ans else ''
        else:
            ans = [clean_xml_chars(y) for y in ans]
        return ans

    title = get('title') or _('Unknown')
    authors = get('authors', False) or [_('Unknown')]
    auth_pat = re.compile(r'([^,]+?)\s*,\s+([^,]+)$')

    def fix_author(x):
        if tweaks['author_sort_copy_method'] != 'copy':
            m = auth_pat.match(x.strip())
            if m is not None:
                return m.group(2) + ' ' + m.group(1)
        return x

    mi = Metadata(title, [fix_author(x) for x in authors])
    if has('author'):
        mi.author_sort = get('author')
    if has('ASIN'):
        mi.set_identifier('mobi-asin', get('ASIN'))
    elif has('content_id'):
        mi.set_identifier('mobi-asin', get('content_id'))
    if has('languages'):
        langs = list(filter(None, (canonicalize_lang(x) for x in get('languages', False))))
        if langs:
            mi.languages = langs
    if has('issue_date'):
        try:
            mi.pubdate = parse_only_date(get('issue_date'))
        except Exception:
            pass
    if has('publisher') and get('publisher') != 'Unknown':
        mi.publisher = get('publisher')
    if read_cover and m[COVER_KEY]:
        try:
            data = base64.standard_b64decode(m[COVER_KEY])
            w, h, fmt = identify_data(data)
        except Exception:
            w, h, fmt = 0, 0, None
        if fmt and w and h:
            mi.cover_data = (fmt, data)

    return mi
Beispiel #16
0
def read_metadata_kfx(stream, read_cover=True):
    " Read the metadata.kfx file that is found in the sdr book folder for KFX files "
    c = Container(stream.read())
    m = extract_metadata(c.decode())
    # dump_metadata(m)

    def has(x):
        return m[x] and m[x][0]

    def get(x, single=True):
        ans = m[x]
        if single:
            ans = clean_xml_chars(ans[0]) if ans else ""
        else:
            ans = [clean_xml_chars(y) for y in ans]
        return ans

    title = get("title") or _("Unknown")
    authors = get("authors", False) or [_("Unknown")]
    auth_pat = re.compile(r"([^,]+?)\s*,\s+([^,]+)$")

    def fix_author(x):
        if tweaks["author_sort_copy_method"] != "copy":
            m = auth_pat.match(x.strip())
            if m is not None:
                return m.group(2) + " " + m.group(1)
        return x

    mi = Metadata(title, [fix_author(x) for x in authors])
    if has("author"):
        mi.author_sort = get("author")
    if has("ASIN"):
        mi.set_identifier("mobi-asin", get("ASIN"))
    elif has("content_id"):
        mi.set_identifier("mobi-asin", get("content_id"))
    if has("languages"):
        langs = list(filter(None, (canonicalize_lang(x) for x in get("languages", False))))
        if langs:
            mi.languages = langs
    if has("issue_date"):
        try:
            mi.pubdate = parse_only_date(get("issue_date"))
        except Exception:
            pass
    if has("publisher") and get("publisher") != "Unknown":
        mi.publisher = get("publisher")
    if read_cover and m[COVER_KEY]:
        try:
            data = base64.standard_b64decode(m[COVER_KEY])
            fmt, w, h = identify(bytes(data))
        except Exception:
            w, h, fmt = 0, 0, None
        if fmt and w > -1 and h > -1:
            mi.cover_data = (fmt, data)

    return mi
Beispiel #17
0
 def to_book_metadata(self):
     mi = Metadata(_('Unknown'))
     if self.db is None:
         return mi
     mi.set_all_user_metadata(self.db.field_metadata.custom_field_metadata())
     for widget in self.basic_metadata_widgets:
         widget.apply_to_metadata(mi)
     for widget in getattr(self, 'custom_metadata_widgets', []):
         widget.apply_to_metadata(mi)
     return mi
Beispiel #18
0
 def report_metadata_failure(self, group_id, details):
     a = self.report.append
     paths = self.file_groups[group_id]
     a(""), a("-" * 70)
     a(_("Failed to read metadata from the file(s):"))
     [a("\t" + f) for f in paths]
     a(_("With error:")), a(details)
     mi = Metadata(_("Unknown"))
     mi.read_metadata_failed = False
     return mi
Beispiel #19
0
 def report_metadata_failure(self, group_id, details):
     a = self.report.append
     paths = self.file_groups[group_id]
     a(''), a('-' * 70)
     a(_('Failed to read metadata from the file(s):'))
     [a('\t' + f) for f in paths]
     a(_('With error:')), a(details)
     mi = Metadata(_('Unknown'))
     mi.read_metadata_failed = False
     return mi
Beispiel #20
0
def create_cover(title, authors, series=None, series_index=1, prefs=None, as_qimage=False):
    ' Create a cover from the specified title, author and series. Any user set'
    ' templates are ignored, to ensure that the specified metadata is used. '
    mi = Metadata(title, authors)
    if series:
        mi.series, mi.series_index = series, series_index
    d = cprefs.defaults
    prefs = override_prefs(
        prefs or cprefs, title_template=d['title_template'], subtitle_template=d['subtitle_template'], footer_template=d['footer_template'])
    return generate_cover(mi, prefs=prefs, as_qimage=as_qimage)
Beispiel #21
0
    def get_baike_metadata(self, title):
        from baidubaike import Page

        try: baike = Page(title)
        except: return None

        info = baike.get_info()
        print "\n".join( "%s:\t%s" % v for v in info.items())
        mi = Metadata(info['title'])
        plat = info.get(u'首发网站', None)
        if not plat:
            plat = info.get(u'首发状态', "网络小说平台")
        plat = plat.replace(u'首发', '')
        mi.publisher = info.get(u'连载平台', plat)
        mi.authors   = [info[u'作者']]
        mi.isbn      = '0000000000001'
        mi.tags      = baike.get_tags()
        mi.pubdate   = datetime.datetime.now()
        mi.timestamp = datetime.datetime.now()
        mi.comments  = baike.get_summary()
        if u'完结' in info.get(u'连载状态', ""):
            day = re.findall('\d*-\d*-\d*', info[u'连载状态'])
            try: mi.pubdate = datetime.datetime.strptime(day[0], '%Y-%m-%d')
            except: pass
        return mi
Beispiel #22
0
def build_meta(log, issue_id):
    """Build metadata record based on comicvine issue_id"""
    issue = pycomicvine.Issue(
        issue_id,
        field_list=[
            "id",
            "name",
            "volume",
            "issue_number",
            "person_credits",
            "description",
            "store_date",
            "cover_date",
        ],
    )
    if not issue or not issue.volume:
        log.warn("Unable to load Issue(%d)" % issue_id)
        return None
    title = "%s #%s" % (issue.volume.name, issue.issue_number)
    if issue.name:
        title = title + ": %s" % (issue.name)
    authors = [p.name for p in issue.person_credits]
    meta = Metadata(title, authors)
    meta.series = issue.volume.name
    meta.series_index = str(issue.issue_number)
    meta.set_identifier("comicvine", str(issue.id))
    meta.comments = issue.description
    meta.has_cover = False
    if issue.volume.publisher:
        meta.publisher = issue.volume.publisher.name
    meta.pubdate = issue.store_date or issue.cover_date
    return meta
Beispiel #23
0
    def __init__(self):
        Metadata.__init__(self, '')

        self._new_book = False

        self.size = 0
        self.datetime = time.gmtime()
        self.path = '' #the quitthyme book storage_id
        self.thumbnail = None

        self.quietthyme_id = -1
Beispiel #24
0
 def test_rtf_metadata(self):
     stream = BytesIO(br'{\rtf1\ansi\ansicpg1252}')
     m = Metadata('Test ø̄title', ['Author One', 'Author БTwo'])
     m.tags = 'tag1 見tag2'.split()
     m.comments = '<p>some ⊹comments</p>'
     m.publisher = 'publiSher'
     set_metadata(stream, m)
     stream.seek(0)
     o = get_metadata(stream)
     for attr in 'title authors publisher comments tags'.split():
         self.assertEqual(getattr(m, attr), getattr(o, attr))
    def identify(self, 
                 log, result_queue, abort, 
                 title=None, authors=None,
                 identifiers={}, timeout=30):

        dlog("IDENTIFIERS")
        dlog(identifiers)

        dc = {}
        for Getter in CGetter.getter_list:
            dlog("USING GETTER: %s" % Getter)
            dlog("my prefs:", prefs)
            isvalid = True
            for pref_key, is_valid_check in Getter.ls_required_pref_key:
                dlog("validating: %s --> = %s" % (pref_key, prefs[pref_key]))
                if not is_valid_check(prefs[pref_key]):
                    dlog("INVALID: %s" % prefs[pref_key])
                    isvalid = False
                    break
            entry_id = identifiers.get(Getter.entry_identifier, None)
            dlog("---------------", entry_id)
            if entry_id and Getter.does_understand(entry_id):
                dc = Getter.resolve(entry_id)
                dlog("RESULT")
                dlog(dc)
                if dc['authors']: break

        dlog(dc)
        if not dc:
            dlog("not dc: returning")
            return None
        dlog("OK")
        
        mi = Metadata(dc["title"], dc["authors"])
        for attr in ("pubdate",
                     "publisher",
                     "issue",
                     "abstract",
                     "keywords",
                     "volume",
                     "pages",
                     ):
            if hasattr(mi, attr) and attr in dc:
                setattr(mi, attr, dc[attr])

        mi.print_all_attributes()
        self.clean_downloaded_metadata(mi)
        result_queue.put(mi)
        dlog("=" * 20)
        dlog(dir(self))
        dlog("=" * 20)

        return None
    def convert(self, oeb_book, output, input_plugin, opts, log):
        """Convert from calibre's internal format to KePub."""
        self.epub_output_plugin.convert(oeb_book, output, input_plugin, opts, log)
        container = KEPubContainer(output, default_log)

        if container.is_drm_encumbered:
            return

        # Write the details file
        o = {
            "kepub_output_version": ".".join([str(n) for n in self.version]),
            "kepub_output_currenttime": datetime.utcnow().ctime(),
        }
        kte_data_file = self.temporary_file("_KePubOutputPluginInfo")
        kte_data_file.write(json.dumps(o))
        kte_data_file.close()
        container.copy_file_to_container(
            kte_data_file.name, name="plugininfo.kte", mt="application/json"
        )

        title = container.opf_xpath("./opf:metadata/dc:title/text()")
        if len(title) > 0:
            title = title[0]
        else:
            title = NULL_VALUES["title"]
        authors = container.opf_xpath(
            './opf:metadata/dc:creator[@opf:role="aut"]/text()'
        )
        if len(authors) < 1:
            authors = NULL_VALUES["authors"]
        mi = Metadata(title, authors)
        language = container.opf_xpath("./opf:metadata/dc:language/text()")
        if len(language) > 0:
            mi.languages = language
            language = language[0]
        else:
            mi.languages = NULL_VALUES["languages"]
            language = NULL_VALUES["language"]
        mi.language

        modify_epub(
            container,
            output,
            metadata=mi,
            opts={
                "clean_markup": opts.kepub_clean_markup,
                "hyphenate": opts.kepub_hyphenate,
                "no-hyphens": opts.kepub_disable_hyphenation,
                "smarten_punctuation": False,
                "extended_kepub_features": True,
            },
        )
Beispiel #27
0
def read_metadata(root):
    ans = Metadata(_('Unknown'), [_('Unknown')])
    prefixes, refines = read_prefixes(root), read_refines(root)
    identifiers = read_identifiers(root, prefixes, refines)
    ids = {}
    for key, vals in identifiers.iteritems():
        if key == 'calibre':
            ans.application_id = vals[0]
        elif key != 'uuid':
            ids[key] = vals[0]
    ans.set_identifiers(ids)

    return ans
Beispiel #28
0
def generate_test_db(
    library_path,  # {{{
    num_of_records=20000,
    num_of_authors=6000,
    num_of_tags=10000,
    tag_length=7,
    author_length=7,
    title_length=10,
    max_authors=10,
    max_tags=10,
):
    import random, string, os, sys, time
    from calibre.constants import preferred_encoding

    if not os.path.exists(library_path):
        os.makedirs(library_path)

    letters = string.letters.decode(preferred_encoding)

    def randstr(length):
        return "".join(random.choice(letters) for i in xrange(length))

    all_tags = [randstr(tag_length) for j in xrange(num_of_tags)]
    print "Generated", num_of_tags, "tags"
    all_authors = [randstr(author_length) for j in xrange(num_of_authors)]
    print "Generated", num_of_authors, "authors"
    all_titles = [randstr(title_length) for j in xrange(num_of_records)]
    print "Generated", num_of_records, "titles"

    testdb = db(library_path)

    print "Creating", num_of_records, "records..."

    start = time.time()

    for i, title in enumerate(all_titles):
        print i + 1,
        sys.stdout.flush()
        authors = random.randint(1, max_authors)
        authors = [random.choice(all_authors) for i in xrange(authors)]
        tags = random.randint(0, max_tags)
        tags = [random.choice(all_tags) for i in xrange(tags)]
        from calibre.ebooks.metadata.book.base import Metadata

        mi = Metadata(title, authors)
        mi.tags = tags
        testdb.import_book(mi, [])

    t = time.time() - start
    print "\nGenerated", num_of_records, "records in:", t, "seconds"
    print "Time per record:", t / float(num_of_records)
Beispiel #29
0
    def parse(self, xml_detail):
        data = xml_detail.split('\n')[1].split("|")
        self.log(data)

        title = data[1]
        authors = [data[0]]
        comments = data[13]
        isbn = data[3]
        publisher = data[6]
        pub_date_tmp = data[34].split('-')
        pub_date = datetime.datetime(int(pub_date_tmp[0]), int(pub_date_tmp[1]), int(pub_date_tmp[2]), tzinfo=utc_tz)
        if isbn is not None:
            isbn_tmp = re.sub("-", "", isbn)
            cover = "%s/images/covers/%s.jpg"%(self.plugin.BASE_URL, isbn_tmp)
        else:
            cover = None

        if title is not None and authors is not None:
            mi = Metadata(title, authors)
            mi.languages = {'ces'}
            mi.comments = as_unicode(comments)
            mi.identifiers = {self.plugin.name:self.ident}
            mi.publisher = publisher
            mi.pubdate = pub_date
            mi.isbn = isbn
            mi.cover_url = cover

            if cover:
                self.plugin.cache_identifier_to_cover_url(self.ident, cover)

            return mi
        else:
            return None
Beispiel #30
0
    def convert(self, oeb_book, output, input_plugin, opts, log):
        self.epub_output_plugin.convert(oeb_book, output, input_plugin, opts,
                                        log)
        container = KEPubContainer(output, default_log)

        if container.is_drm_encumbered:
            return

        # Write the details file
        o = {
            'kepub_output_version': ".".join([str(n) for n in self.version]),
            'kepub_output_currenttime': datetime.utcnow().ctime()
        }
        kte_data_file = self.temporary_file('_KePubOutputPluginInfo')
        kte_data_file.write(json.dumps(o))
        kte_data_file.close()
        container.copy_file_to_container(kte_data_file.name,
                                         name='plugininfo.kte',
                                         mt='application/json')

        title = container.opf_xpath("./opf:metadata/dc:title/text()")
        if len(title) > 0:
            title = title[0]
        else:
            title = NULL_VALUES['title']
        authors = container.opf_xpath(
            './opf:metadata/dc:creator[@opf:role="aut"]/text()')
        if len(authors) < 1:
            authors = NULL_VALUES['authors']
        mi = Metadata(title, authors)
        language = container.opf_xpath("./opf:metadata/dc:language/text()")
        if len(language) > 0:
            mi.languages = language
            language = language[0]
        else:
            mi.languages = NULL_VALUES['languages']
            language = NULL_VALUES['language']
        mi.language

        modify_epub(container,
                    output,
                    metadata=mi,
                    opts={
                        'clean_markup': opts.kepub_clean_markup,
                        'hyphenate': opts.kepub_hyphenate,
                        'no-hyphens': opts.kepub_disable_hyphenation,
                        'replace_lang': opts.kepub_replace_lang,
                        'smarten_punctuation': False,
                        'extended_kepub_features': True
                    })
Beispiel #31
0
def to_metadata(browser, log, entry_, timeout):  # {{{
    from lxml import etree
    XPath = partial(etree.XPath, namespaces=NAMESPACES)

    # total_results  = XPath('//openSearch:totalResults')
    # start_index    = XPath('//openSearch:startIndex')
    # items_per_page = XPath('//openSearch:itemsPerPage')
    entry = XPath('//atom:entry')
    entry_id = XPath('descendant::atom:id')
    creator = XPath('descendant::dc:creator')
    identifier = XPath('descendant::dc:identifier')
    title = XPath('descendant::dc:title')
    date = XPath('descendant::dc:date')
    publisher = XPath('descendant::dc:publisher')
    subject = XPath('descendant::dc:subject')
    description = XPath('descendant::dc:description')
    language = XPath('descendant::dc:language')
    rating = XPath('descendant::gd:rating[@average]')

    def get_text(extra, x):
        try:
            ans = x(extra)
            if ans:
                ans = ans[0].text
                if ans and ans.strip():
                    return ans.strip()
        except:
            log.exception('Programming error:')
        return None

    id_url = entry_id(entry_)[0].text
    google_id = id_url.split('/')[-1]
    title_ = ': '.join([x.text for x in title(entry_)]).strip()
    authors = [x.text.strip() for x in creator(entry_) if x.text]
    if not authors:
        authors = [_('Unknown')]
    if not id_url or not title:
        # Silently discard this entry
        return None

    mi = Metadata(title_, authors)
    mi.identifiers = {'google': google_id}
    try:
        raw = get_details(browser, id_url, timeout)
        feed = etree.fromstring(
            xml_to_unicode(clean_ascii_chars(raw),
                           strip_encoding_pats=True)[0])
        extra = entry(feed)[0]
    except:
        log.exception('Failed to get additional details for', mi.title)
        return mi

    mi.comments = get_text(extra, description)
    lang = canonicalize_lang(get_text(extra, language))
    if lang:
        mi.language = lang
    mi.publisher = get_text(extra, publisher)

    # ISBN
    isbns = []
    for x in identifier(extra):
        t = str(x.text).strip()
        if t[:5].upper() in ('ISBN:', 'LCCN:', 'OCLC:'):
            if t[:5].upper() == 'ISBN:':
                t = check_isbn(t[5:])
                if t:
                    isbns.append(t)
    if isbns:
        mi.isbn = sorted(isbns, key=len)[-1]
    mi.all_isbns = isbns

    # Tags
    try:
        btags = [x.text for x in subject(extra) if x.text]
        tags = []
        for t in btags:
            atags = [y.strip() for y in t.split('/')]
            for tag in atags:
                if tag not in tags:
                    tags.append(tag)
    except:
        log.exception('Failed to parse tags:')
        tags = []
    if tags:
        mi.tags = [x.replace(',', ';') for x in tags]

    # pubdate
    pubdate = get_text(extra, date)
    if pubdate:
        from calibre.utils.date import parse_date, utcnow
        try:
            default = utcnow().replace(day=15)
            mi.pubdate = parse_date(pubdate, assume_utc=True, default=default)
        except:
            log.error('Failed to parse pubdate %r' % pubdate)

    # Ratings
    for x in rating(extra):
        try:
            mi.rating = float(x.get('average'))
            if mi.rating > 5:
                mi.rating /= 2
        except:
            log.exception('Failed to parse rating')

    # Cover
    mi.has_google_cover = None
    for x in extra.xpath(
            '//*[@href and @rel="http://schemas.google.com/books/2008/thumbnail"]'
    ):
        mi.has_google_cover = x.get('href')
        break

    return mi
Beispiel #32
0
def metadata_from_xmp_packet(raw_bytes):
    root = parse_xmp_packet(raw_bytes)
    mi = Metadata(_('Unknown'))
    title = first_alt('//dc:title', root)
    if title:
        if title.startswith(r'\376\377'):
            # corrupted XMP packet generated by Nitro PDF. See
            # https://bugs.launchpad.net/calibre/+bug/1541981
            raise ValueError(
                'Corrupted XMP metadata packet detected, probably generated by Nitro PDF'
            )
        mi.title = title
    authors = multiple_sequences('//dc:creator', root)
    if authors:
        mi.authors = [au for aus in authors for au in string_to_authors(aus)]
    tags = multiple_sequences('//dc:subject', root) or multiple_sequences(
        '//pdf:Keywords', root)
    if tags:
        mi.tags = tags
    comments = first_alt('//dc:description', root)
    if comments:
        mi.comments = comments
    publishers = multiple_sequences('//dc:publisher', root)
    if publishers:
        mi.publisher = publishers[0]
    try:
        pubdate = parse_date(first_sequence('//dc:date', root)
                             or first_simple('//xmp:CreateDate', root),
                             assume_utc=False)
    except:
        pass
    else:
        mi.pubdate = pubdate
    bkp = first_simple('//xmp:CreatorTool', root)
    if bkp:
        mi.book_producer = bkp
    md = safe_parse_date(first_simple('//xmp:MetadataDate', root))
    mod = safe_parse_date(first_simple('//xmp:ModifyDate', root))
    fd = more_recent(md, mod)
    if fd is not None:
        mi.metadata_date = fd
    rating = first_simple('//calibre:rating', root)
    if rating is not None:
        try:
            rating = float(rating)
            if 0 <= rating <= 10:
                mi.rating = rating
        except (ValueError, TypeError):
            pass
    series, series_index = read_series(root)
    if series:
        mi.series, mi.series_index = series, series_index
    for x in ('title_sort', 'author_sort'):
        for elem in XPath('//calibre:' + x)(root):
            val = read_simple_property(elem)
            if val:
                setattr(mi, x, val)
                break
    for x in ('author_link_map', 'user_categories'):
        val = first_simple('//calibre:' + x, root)
        if val:
            try:
                setattr(mi, x, json.loads(val))
            except:
                pass

    languages = multiple_sequences('//dc:language', root)
    if languages:
        languages = list(filter(None, map(canonicalize_lang, languages)))
        if languages:
            mi.languages = languages

    identifiers = {}
    for xmpid in XPath('//xmp:Identifier')(root):
        for scheme, value in read_xmp_identifers(xmpid):
            if scheme and value:
                identifiers[scheme.lower()] = value

    for namespace in ('prism', 'pdfx'):
        for scheme in KNOWN_ID_SCHEMES:
            if scheme not in identifiers:
                val = first_simple(f'//{namespace}:{scheme}', root)
                scheme = scheme.lower()
                if scheme == 'isbn':
                    val = check_isbn(val)
                elif scheme == 'doi':
                    val = check_doi(val)
                if val:
                    identifiers[scheme] = val

    # Check Dublin Core for recognizable identifier types
    for scheme, check_func in iteritems({
            'doi': check_doi,
            'isbn': check_isbn
    }):
        if scheme not in identifiers:
            val = check_func(first_simple('//dc:identifier', root))
            if val:
                identifiers['doi'] = val

    if identifiers:
        mi.set_identifiers(identifiers)

    read_user_metadata(mi, root)

    return mi
Beispiel #33
0
class Book(Book_):

    def __init__(self, prefix, lpath, title=None, authors=None, mime=None, date=None, ContentType=None,
                 thumbnail_name=None, size=None, other=None):
        from calibre.utils.date import parse_date
#         debug_print('Book::__init__ - title=', title)
        show_debug = title is not None and title.lower().find("xxxxx") >= 0
        if other is not None:
            other.title = title
            other.published_date = date
        if show_debug:
            debug_print("Book::__init__ - title=", title, 'authors=', authors)
            debug_print("Book::__init__ - other=", other)
        super(Book, self).__init__(prefix, lpath, size, other)

        if title is not None and len(title) > 0:
            self.title = title

        if authors is not None and len(authors) > 0:
            self.authors_from_string(authors)
            if self.author_sort is None or self.author_sort == "Unknown":
                self.author_sort = author_to_author_sort(authors)

        self.mime = mime

        self.size = size  # will be set later if None

        if ContentType == '6' and date is not None:
            try:
                self.datetime = time.strptime(date, "%Y-%m-%dT%H:%M:%S.%f")
            except:
                try:
                    self.datetime = time.strptime(date.split('+')[0], "%Y-%m-%dT%H:%M:%S")
                except:
                    try:
                        self.datetime = time.strptime(date.split('+')[0], "%Y-%m-%d")
                    except:
                        try:
                            self.datetime = parse_date(date,
                                    assume_utc=True).timetuple()
                        except:
                            try:
                                self.datetime = time.gmtime(os.path.getctime(self.path))
                            except:
                                self.datetime = time.gmtime()

        self.kobo_metadata = Metadata(title, self.authors)
        self.contentID          = None
        self.current_shelves    = []
        self.kobo_collections   = []
        self.can_put_on_shelves = True
        self.kobo_series        = None
        self.kobo_series_number = None  # Kobo stores the series number as string. And it can have a leading "#".
        self.kobo_subtitle      = None

        if thumbnail_name is not None:
            self.thumbnail = ImageWrapper(thumbnail_name)

        if show_debug:
            debug_print("Book::__init__ end - self=", self)
            debug_print("Book::__init__ end - title=", title, 'authors=', authors)

    @property
    def is_sideloaded(self):
        # If we don't have a content Id, we don't know what type it is.
        return self.contentID and self.contentID.startswith("file")

    @property
    def is_purchased_kepub(self):
        return self.contentID and not self.contentID.startswith("file")

    def __unicode__(self):
        '''
        A string representation of this object, suitable for printing to
        console
        '''
        ans = [u"Kobo metadata:"]

        def fmt(x, y):
            ans.append(u'%-20s: %s'%(unicode_type(x), unicode_type(y)))

        if self.contentID:
            fmt('Content ID', self.contentID)
        if self.kobo_series:
            fmt('Kobo Series', self.kobo_series + ' #%s'%self.kobo_series_number)
        if self.kobo_subtitle:
            fmt('Subtitle', self.kobo_subtitle)
        if self.mime:
            fmt('MimeType', self.mime)

        ans = u'\n'.join(ans) + u"\n" + self.kobo_metadata.__unicode__()

        return super(Book,self).__unicode__() + u"\n" + ans
Beispiel #34
0
    def parse_details(self, raw, root):
        asin = parse_asin(root, self.log, self.url)
        if self.testing:
            import tempfile, uuid
            with tempfile.NamedTemporaryFile(prefix=(asin or str(uuid.uuid4()))+ '_',
                    suffix='.html', delete=False) as f:
                f.write(raw)
            print ('Downloaded html for', asin, 'saved in', f.name)

        try:
            title = self.parse_title(root)
        except:
            self.log.exception('Error parsing title for url: %r'%self.url)
            title = None

        try:
            authors = self.parse_authors(root)
        except:
            self.log.exception('Error parsing authors for url: %r'%self.url)
            authors = []

        if not title or not authors or not asin:
            self.log.error('Could not find title/authors/asin for %r'%self.url)
            self.log.error('ASIN: %r Title: %r Authors: %r'%(asin, title,
                authors))
            return

        mi = Metadata(title, authors)
        idtype = 'amazon' if self.domain == 'com' else 'amazon_'+self.domain
        mi.set_identifier(idtype, asin)
        self.amazon_id = asin

        try:
            mi.rating = self.parse_rating(root)
        except:
            self.log.exception('Error parsing ratings for url: %r'%self.url)

        try:
            mi.comments = self.parse_comments(root)
        except:
            self.log.exception('Error parsing comments for url: %r'%self.url)

        try:
            series, series_index = self.parse_series(root)
            if series:
                mi.series, mi.series_index = series, series_index
            elif self.testing:
                mi.series, mi.series_index = 'Dummy series for testing', 1
        except:
            self.log.exception('Error parsing series for url: %r'%self.url)

        try:
            mi.tags = self.parse_tags(root)
        except:
            self.log.exception('Error parsing tags for url: %r'%self.url)

        try:
            self.cover_url = self.parse_cover(root, raw)
        except:
            self.log.exception('Error parsing cover for url: %r'%self.url)
        mi.has_cover = bool(self.cover_url)

        non_hero = tuple(self.selector('div#bookDetails_container_div div#nonHeroSection'))
        if non_hero:
            # New style markup
            try:
                self.parse_new_details(root, mi, non_hero[0])
            except:
                self.log.exception('Failed to parse new-style book details section')
        else:
            pd = root.xpath(self.pd_xpath)
            if pd:
                pd = pd[0]

                try:
                    isbn = self.parse_isbn(pd)
                    if isbn:
                        self.isbn = mi.isbn = isbn
                except:
                    self.log.exception('Error parsing ISBN for url: %r'%self.url)

                try:
                    mi.publisher = self.parse_publisher(pd)
                except:
                    self.log.exception('Error parsing publisher for url: %r'%self.url)

                try:
                    mi.pubdate = self.parse_pubdate(pd)
                except:
                    self.log.exception('Error parsing publish date for url: %r'%self.url)

                try:
                    lang = self.parse_language(pd)
                    if lang:
                        mi.language = lang
                except:
                    self.log.exception('Error parsing language for url: %r'%self.url)

            else:
                self.log.warning('Failed to find product description for url: %r'%self.url)

        mi.source_relevance = self.relevance

        if self.amazon_id:
            if self.isbn:
                self.plugin.cache_isbn_to_identifier(self.isbn, self.amazon_id)
            if self.cover_url:
                self.plugin.cache_identifier_to_cover_url(self.amazon_id,
                        self.cover_url)

        self.plugin.clean_downloaded_metadata(mi)

        self.result_queue.put(mi)
Beispiel #35
0
def identify(
        log,
        abort,  # {{{
        title=None,
        authors=None,
        identifiers={},
        timeout=30):
    if title == _('Unknown'):
        title = None
    if authors == [_('Unknown')]:
        authors = None
    start_time = time.time()
    plugins = [p for p in metadata_plugins(['identify']) if p.is_configured()]

    kwargs = {
        'title': title,
        'authors': authors,
        'identifiers': identifiers,
        'timeout': timeout,
    }

    log('Running identify query with parameters:')
    log(kwargs)
    log('Using plugins:', ', '.join([p.name for p in plugins]))
    log('The log from individual plugins is below')

    workers = [Worker(p, kwargs, abort) for p in plugins]
    for w in workers:
        w.start()

    first_result_at = None
    results = {}
    for p in plugins:
        results[p] = []
    logs = dict([(w.plugin, w.buf) for w in workers])

    def get_results():
        found = False
        for w in workers:
            try:
                result = w.rq.get_nowait()
            except Empty:
                pass
            else:
                results[w.plugin].append(result)
                found = True
        return found

    wait_time = msprefs['wait_after_first_identify_result']
    while True:
        time.sleep(0.2)

        if get_results() and first_result_at is None:
            first_result_at = time.time()

        if not is_worker_alive(workers):
            break

        if (first_result_at is not None
                and time.time() - first_result_at > wait_time):
            log.warn('Not waiting any longer for more results. Still running'
                     ' sources:')
            for worker in workers:
                if worker.is_alive():
                    log.debug('\t' + worker.name)
            abort.set()
            break

    while not abort.is_set() and get_results():
        pass

    sort_kwargs = dict(kwargs)
    for k in list(sort_kwargs.iterkeys()):
        if k not in ('title', 'authors', 'identifiers'):
            sort_kwargs.pop(k)

    longest, lp = -1, ''
    for plugin, presults in results.iteritems():
        presults.sort(key=plugin.identify_results_keygen(**sort_kwargs))

        # Throw away lower priority results from the same source that have exactly the same
        # title and authors as a higher priority result
        filter_results = set()
        filtered_results = []
        for r in presults:
            key = (r.title, tuple(r.authors))
            if key not in filter_results:
                filtered_results.append(r)
                filter_results.add(key)
        results[plugin] = presults = filtered_results

        plog = logs[plugin].getvalue().strip()
        log('\n' + '*' * 30, plugin.name, '*' * 30)
        log('Request extra headers:', plugin.browser.addheaders)
        log('Found %d results' % len(presults))
        time_spent = getattr(plugin, 'dl_time_spent', None)
        if time_spent is None:
            log('Downloading was aborted')
            longest, lp = -1, plugin.name
        else:
            log('Downloading from', plugin.name, 'took', time_spent)
            if time_spent > longest:
                longest, lp = time_spent, plugin.name
        for r in presults:
            log('\n\n---')
            log(unicode(r))
        if plog:
            log(plog)
        log('\n' + '*' * 80)

        dummy = Metadata(_('Unknown'))
        for i, result in enumerate(presults):
            for f in plugin.prefs['ignore_fields']:
                if ':' not in f:
                    setattr(result, f, getattr(dummy, f))
                if f == 'series':
                    result.series_index = dummy.series_index
            result.relevance_in_source = i
            result.has_cached_cover_url = (plugin.cached_cover_url_is_reliable
                                           and plugin.get_cached_cover_url(
                                               result.identifiers) is not None)
            result.identify_plugin = plugin
            if msprefs['txt_comments']:
                if plugin.has_html_comments and result.comments:
                    result.comments = html2text(result.comments)

    log('The identify phase took %.2f seconds' % (time.time() - start_time))
    log('The longest time (%f) was taken by:' % longest, lp)
    log('Merging results from different sources and finding earliest ',
        'publication dates from the worldcat.org service')
    start_time = time.time()
    results = merge_identify_results(results, log)

    log('We have %d merged results, merging took: %.2f seconds' %
        (len(results), time.time() - start_time))

    max_tags = msprefs['max_tags']
    for r in results:
        r.tags = r.tags[:max_tags]
        if getattr(r.pubdate, 'year', 2000) <= UNDEFINED_DATE.year:
            r.pubdate = None

    if msprefs['swap_author_names']:
        for r in results:

            def swap_to_ln_fn(a):
                if ',' in a:
                    return a
                parts = a.split(None)
                if len(parts) <= 1:
                    return a
                surname = parts[-1]
                return '%s, %s' % (surname, ' '.join(parts[:-1]))

            r.authors = [swap_to_ln_fn(a) for a in r.authors]

    return results
Beispiel #36
0
    def _GoodreadsBook_to_Metadata(self, book):
        # type: (_GoodreadsBook) -> Metadata
        """
        :param book: _GoodreadsBook: book
        :return: Metadata: Metadata
        """
        mi = Metadata(book.title, book.authors)
        mi.source_relevance = 0
        mi.set_identifier('goodreads', book.id)

        if self.prefs['NEVER_REPLACE_ISBN'] and mi.get_identifiers().get(
                'isbn'):
            mi.set_identifier('isbn', '')

        if book.asin and not self.prefs['NEVER_REPLACE_AMAZONID']:
            mi.set_identifier('amazon', book.asin)

        if book.isbn and not self.prefs['NEVER_REPLACE_ISBN']:
            try:
                if len(book.isbn) == 10:
                    mi.isbn = check_isbn13(_ISBNConvert.convert(book.isbn))
                else:
                    mi.isbn = check_isbn13(book.isbn)
            except:
                self.log.error("ISBN CONVERSION ERROR:", book.isbn)
                self.log.exception()

        if book.image_url:
            self.log.info('cache_identifier_to_cover_url:', book.asin, ':',
                          book.image_url)
            self.cache_identifier_to_cover_url(book.id, book.image_url)

        if book.publisher:
            self.log.info('book.publisher is:', book.publisher)
            mi.publisher = book.publisher

        if book.pubdate:
            self.log.info('book.pubdate is:',
                          book.pubdate.strftime('%Y-%m-%d'))
            mi.pubdate = book.pubdate

        if book.comments:
            self.log.info('book.editorial_review is:', book.comments)
            mi.comments = book.comments

        tags = self.prefs['ADD_THESE_TAGS'].split(',')
        tags.extend(book.tags)
        # tag_mappings = JSONConfig('plugins/GenreMappings')['genreMappings']
        # mi.tags = list(set(sorted(filter(lambda x: tag_mappings.get(x, x), tags))))

        if book.series:
            mi.series = book.series
            self.log.info(u'series:', book.series)
            if book.series_index:
                mi.series_index = book.series_index
                self.log.info(u'series_index:',
                              "{0:.2f}".format(book.series_index))
            else:
                mi.series_index = 0

        if book.average_rating:
            mi.rating = book.average_rating

        self.clean_downloaded_metadata(mi)

        return mi
Beispiel #37
0
 def validate(self, x):
     from calibre.ebooks.metadata.book.base import Metadata
     self.book = Metadata('')
     return self.vformat(x, [], {})
Beispiel #38
0
    def test_legacy_direct(self):  # {{{
        'Test read-only methods that are directly equivalent in the old and new interface'
        from calibre.ebooks.metadata.book.base import Metadata
        from datetime import timedelta
        ndb = self.init_legacy(self.cloned_library)
        db = self.init_old()
        newstag = ndb.new_api.get_item_id('tags', 'news')

        self.assertEqual(dict(db.prefs), dict(ndb.prefs))

        for meth, args in {
                'find_identical_books': [(Metadata('title one',
                                                   ['author one']), ),
                                         (Metadata('unknown'), ),
                                         (Metadata('xxxx'), )],
                'get_books_for_category': [('tags', newstag),
                                           ('#formats', 'FMT1')],
                'get_next_series_num_for': [('A Series One', )],
                'get_id_from_uuid': [('ddddd', ), (db.uuid(1, True), )],
                'cover': [(0, ), (1, ), (2, )],
                'get_author_id': [('author one', ), ('unknown', ),
                                  ('xxxxx', )],
                'series_id': [(0, ), (1, ), (2, )],
                'publisher_id': [(0, ), (1, ), (2, )],
                '@tags_older_than': [
                    ('News', None),
                    ('Tag One', None),
                    ('xxxx', None),
                    ('Tag One', None, 'News'),
                    ('News', None, 'xxxx'),
                    ('News', None, None, ['xxxxxxx']),
                    ('News', None, 'Tag One', ['Author Two', 'Author One']),
                    ('News', timedelta(0), None, None),
                    ('News', timedelta(100000)),
                ],
                'format': [(1, 'FMT1', True), (2, 'FMT1', True),
                           (0, 'xxxxxx')],
                'has_format': [(1, 'FMT1', True), (2, 'FMT1', True),
                               (0, 'xxxxxx')],
                'sizeof_format': [(1, 'FMT1', True), (2, 'FMT1', True),
                                  (0, 'xxxxxx')],
                '@format_files': [(0, ), (1, ), (2, )],
                'formats': [(0, ), (1, ), (2, )],
                'max_size': [(0, ), (1, ), (2, )],
                'format_hash': [(1, 'FMT1'), (1, 'FMT2'), (2, 'FMT1')],
                'author_sort_from_authors':
            [(['Author One', 'Author Two', 'Unknown'], )],
                'has_book': [(Metadata('title one'), ),
                             (Metadata('xxxx1111'), )],
                'has_id': [(1, ), (2, ), (3, ), (9999, )],
                'id': [
                    (1, ),
                    (2, ),
                    (0, ),
                ],
                'index': [
                    (1, ),
                    (2, ),
                    (3, ),
                ],
                'row': [
                    (1, ),
                    (2, ),
                    (3, ),
                ],
                'is_empty': [()],
                'count': [()],
                'all_author_names': [()],
                'all_tag_names': [()],
                'all_series_names': [()],
                'all_publisher_names': [()],
                '!all_authors': [()],
                '!all_tags2': [()],
                '@all_tags': [()],
                '@get_all_identifier_types': [()],
                '!all_publishers': [()],
                '!all_titles': [()],
                '!all_series': [()],
                'standard_field_keys': [()],
                'all_field_keys': [()],
                'searchable_fields': [()],
                'search_term_to_field_key': [('author', ), ('tag', )],
                'metadata_for_field': [('title', ), ('tags', )],
                'sortable_field_keys': [()],
                'custom_field_keys': [(True, ), (False, )],
                '!get_usage_count_by_id': [('authors', ), ('tags', ),
                                           ('series', ), ('publisher', ),
                                           ('#tags', ), ('languages', )],
                'get_field': [(1, 'title'), (2, 'tags'), (0, 'rating'),
                              (1, 'authors'), (2, 'series'), (1, '#tags')],
                'all_formats': [()],
                'get_authors_with_ids': [()],
                '!get_tags_with_ids': [()],
                '!get_series_with_ids': [()],
                '!get_publishers_with_ids': [()],
                '!get_ratings_with_ids': [()],
                '!get_languages_with_ids': [()],
                'tag_name': [(3, )],
                'author_name': [(3, )],
                'series_name': [(3, )],
                'authors_sort_strings': [(0, ), (1, ), (2, )],
                'author_sort_from_book': [(0, ), (1, ), (2, )],
                'authors_with_sort_strings': [(0, ), (1, ), (2, )],
                'book_on_device_string': [(1, ), (2, ), (3, )],
                'books_in_series_of': [(0, ), (1, ), (2, )],
                'books_with_same_title': [(Metadata(db.title(0)), ),
                                          (Metadata(db.title(1)), ),
                                          (Metadata('1234'), )],
        }.iteritems():
            fmt = lambda x: x
            if meth[0] in {'!', '@'}:
                fmt = {'!': dict, '@': frozenset}[meth[0]]
                meth = meth[1:]
            elif meth == 'get_authors_with_ids':
                fmt = lambda val: {x[0]: tuple(x[1:]) for x in val}
            for a in args:
                self.assertEqual(
                    fmt(getattr(db, meth)(*a)), fmt(getattr(ndb, meth)(*a)),
                    'The method: %s() returned different results for argument %s'
                    % (meth, a))

        def f(
            x, y
        ):  # get_top_level_move_items is broken in the old db on case-insensitive file systems
            x.discard('metadata_db_prefs_backup.json')
            return x, y

        self.assertEqual(f(*db.get_top_level_move_items()),
                         f(*ndb.get_top_level_move_items()))
        d1, d2 = BytesIO(), BytesIO()
        db.copy_cover_to(1, d1, True)
        ndb.copy_cover_to(1, d2, True)
        self.assertTrue(d1.getvalue() == d2.getvalue())
        d1, d2 = BytesIO(), BytesIO()
        db.copy_format_to(1, 'FMT1', d1, True)
        ndb.copy_format_to(1, 'FMT1', d2, True)
        self.assertTrue(d1.getvalue() == d2.getvalue())
        old = db.get_data_as_dict(prefix='test-prefix')
        new = ndb.get_data_as_dict(prefix='test-prefix')
        for o, n in zip(old, new):
            o = {
                type('')(k) if isinstance(k, bytes) else k:
                set(v) if isinstance(v, list) else v
                for k, v in o.iteritems()
            }
            n = {
                k: set(v) if isinstance(v, list) else v
                for k, v in n.iteritems()
            }
            self.assertEqual(o, n)

        ndb.search('title:Unknown')
        db.search('title:Unknown')
        self.assertEqual(db.row(3), ndb.row(3))
        self.assertRaises(ValueError, ndb.row, 2)
        self.assertRaises(ValueError, db.row, 2)
        db.close()
Beispiel #39
0
 def reset_info(self):
     self.show_data(Metadata(_('Unknown')))
Beispiel #40
0
def read_metadata_kfx(stream, read_cover=True):
    ' Read the metadata.kfx file that is found in the sdr book folder for KFX files '
    c = Container(stream.read())
    m = extract_metadata(c.decode())

    # dump_metadata(m)

    def has(x):
        return m[x] and m[x][0]

    def get(x, single=True):
        ans = m[x]
        if single:
            ans = clean_xml_chars(ans[0]) if ans else ''
        else:
            ans = [clean_xml_chars(y) for y in ans]
        return ans

    title = get('title') or _('Unknown')
    authors = get('author', False) or [_('Unknown')]
    auth_pat = re.compile(r'([^,]+?)\s*,\s+([^,]+)$')

    def fix_author(x):
        if tweaks['author_sort_copy_method'] != 'copy':
            m = auth_pat.match(x.strip())
            if m is not None:
                return m.group(2) + ' ' + m.group(1)
        return x

    unique_authors = []  # remove duplicates while retaining order
    for f in [fix_author(x) for x in authors]:
        if f not in unique_authors:
            unique_authors.append(f)

    mi = Metadata(title, unique_authors)
    if has('author'):
        mi.author_sort = get('author')
    if has('ASIN'):
        mi.set_identifier('mobi-asin', get('ASIN'))
    elif has('content_id'):
        mi.set_identifier('mobi-asin', get('content_id'))
    if has('languages'):
        langs = list(
            filter(None,
                   (canonicalize_lang(x) for x in get('languages', False))))
        if langs:
            mi.languages = langs
    if has('issue_date'):
        try:
            mi.pubdate = parse_only_date(get('issue_date'))
        except Exception:
            pass
    if has('publisher') and get('publisher') != 'Unknown':
        mi.publisher = get('publisher')
    if read_cover and m[COVER_KEY]:
        try:
            data = from_base64_bytes(m[COVER_KEY])
            fmt, w, h = identify(data)
        except Exception:
            w, h, fmt = 0, 0, None
        if fmt and w > -1 and h > -1:
            mi.cover_data = (fmt, data)

    return mi
Beispiel #41
0
 def mi(self):
     mi = Metadata(unicode(self.title.text()).strip() or _('Unknown'))
     mi.authors = string_to_authors(unicode(
         self.authors.text()).strip()) or [_('Unknown')]
     mi.languages = self.languages.lang_codes or [get_lang()]
     return mi
Beispiel #42
0
def to_metadata(log,gmetadata,ExHentai_Status): # {{{
    title = gmetadata['title']
    title_jpn = gmetadata['title_jpn']
    tags = gmetadata['tags']
    rating = gmetadata['rating']
    category = gmetadata['category']
    gid = gmetadata['gid']
    token = gmetadata['token']
    thumb = gmetadata['thumb']
    
    # title
    if title_jpn:
        raw_title = title_jpn
    else:
        raw_title = title
    pat1 = re.compile(r'(?P<comments>.*?\[(?P<author>(?:(?!汉化|漢化)[^\[\]])*)\](?:\s*(?:\[[^\(\)]+\]|\([^\[\]\(\)]+\))\s*)*(?P<title>[^\[\]\(\)]+).*)')
    if re.findall(pat1,raw_title):
        m = re.search(pat1, raw_title)
        title_ = m.group('title').strip()
        author = m.group('author').strip()
    else:
        title_ = raw_title.strip()
        author = 'Unknown'
        log.exception('Title match failed. Title is %s' % raw_title)
    
    authors = [(author)]
        
    mi = Metadata(title_, authors)
    mi.identifiers = {'ehentai':'%s_%s_%d' % (str(gid),str(token),int(ExHentai_Status))}
    
    # publisher
    pat2 = re.compile(r'^\(([^\[\]\(\)]*)\)')
    if re.findall(pat2, raw_title):
        publisher = re.search(pat2, raw_title).group(1).strip()
        mi.publisher = publisher
    else:
        mi.publisher = 'Unknown'
        log.exception('Not Found publisher.')

    # Tags
    tags_ = []
    for tag in tags:
        if re.match('language',tag):
            tag_ = re.sub('language:','',tag)
            if tag_ != 'translated':
                mi.language = tag_
            else:
                tags_.append(tag_)
#         elif re.match('parody|group|character|artist', tag):
#             log('drop tag %s' % tag)
#             continue
        elif not ':' in tag:
            log('drop tag %s' % tag)
            continue
        else:
            tags_.append(tag)
    tags_.append(category)
    mi.tags = tags_
    
    # rating
    mi.rating = float(rating)
    
    # cover
    mi.has_ehentai_cover = None
    if thumb:
        mi.has_ehentai_cover = thumb
    return mi
Beispiel #43
0
def to_metadata(browser, log, entry_, timeout):  # {{{
    from lxml import etree
    from calibre.ebooks.chardet import xml_to_unicode
    from calibre.utils.date import parse_date, utcnow
    from calibre.utils.cleantext import clean_ascii_chars

    XPath = partial(etree.XPath, namespaces=NAMESPACES)
    entry = XPath('//atom:entry')
    entry_id = XPath('descendant::atom:id')
    title = XPath('descendant::atom:title')
    description = XPath('descendant::atom:summary')
    publisher = XPath("descendant::db:attribute[@name='publisher']")
    isbn = XPath("descendant::db:attribute[@name='isbn13']")
    date = XPath("descendant::db:attribute[@name='pubdate']")
    creator = XPath("descendant::db:attribute[@name='author']")
    booktag = XPath("descendant::db:tag/attribute::name")
    rating = XPath("descendant::gd:rating/attribute::average")
    cover_url = XPath("descendant::atom:link[@rel='image']/attribute::href")

    def get_text(extra, x):
        try:
            ans = x(extra)
            if ans:
                ans = ans[0].text
                if ans and ans.strip():
                    return ans.strip()
        except:
            log.exception('Programming error:')
        return None

    id_url = entry_id(entry_)[0].text
    douban_id = id_url.split('/')[-1]
    title_ = ': '.join([x.text for x in title(entry_)]).strip()
    authors = [x.text.strip() for x in creator(entry_) if x.text]
    if not authors:
        authors = [_('Unknown')]
    if not id_url or not title:
        # Silently discard this entry
        return None

    mi = Metadata(title_, authors)
    mi.identifiers = {'douban': douban_id}
    try:
        raw = get_details(browser, id_url, timeout)
        feed = etree.fromstring(
            xml_to_unicode(clean_ascii_chars(raw),
                           strip_encoding_pats=True)[0])
        extra = entry(feed)[0]
    except:
        log.exception('Failed to get additional details for', mi.title)
        return mi
    mi.comments = get_text(extra, description)
    mi.publisher = get_text(extra, publisher)

    # ISBN
    isbns = []
    for x in [t.text for t in isbn(extra)]:
        if check_isbn(x):
            isbns.append(x)
    if isbns:
        mi.isbn = sorted(isbns, key=len)[-1]
    mi.all_isbns = isbns

    # Tags
    try:
        btags = [x for x in booktag(extra) if x]
        tags = []
        for t in btags:
            atags = [y.strip() for y in t.split('/')]
            for tag in atags:
                if tag not in tags:
                    tags.append(tag)
    except:
        log.exception('Failed to parse tags:')
        tags = []
    if tags:
        mi.tags = [x.replace(',', ';') for x in tags]

    # pubdate
    pubdate = get_text(extra, date)
    if pubdate:
        try:
            default = utcnow().replace(day=15)
            mi.pubdate = parse_date(pubdate, assume_utc=True, default=default)
        except:
            log.error('Failed to parse pubdate %r' % pubdate)

    # Ratings
    if rating(extra):
        try:
            mi.rating = float(rating(extra)[0]) / 2.0
        except:
            log.exception('Failed to parse rating')
            mi.rating = 0

    # Cover
    mi.has_douban_cover = None
    u = cover_url(extra)
    if u:
        u = u[0].replace('/spic/', '/lpic/')
        # If URL contains "book-default", the book doesn't have a cover
        if u.find('book-default') == -1:
            mi.has_douban_cover = u
    return mi
Beispiel #44
0
    def _metadata(self, baike):
        from calibre.ebooks.metadata.book.base import Metadata
        from cStringIO import StringIO

        info = baike.get_info()
        print "\n".join("%s:\t%s" % v for v in info.items())

        mi = Metadata(info['title'])
        plat = "網絡小說平台"
        plat = info.get(u'首發狀態', plat)
        plat = info.get(u'首發網站', plat)
        plat = plat.replace(u'首發', '')
        mi.publisher = info.get(u'連載平台', plat)
        mi.authors = [info.get(u'作者', u'佚名')]
        mi.author_sort = mi.authors[0]
        mi.isbn = BAIKE_ISBN
        mi.tags = baike.get_tags()
        mi.pubdate = datetime.datetime.now()
        mi.timestamp = datetime.datetime.now()
        mi.cover_url = baike.get_image()
        mi.comments = re.sub(r'\[\d+\]$', "", baike.get_summary())
        mi.website = baike.http.url
        mi.source = u'百度百科'

        if self.copy_image:
            img = StringIO(urlopen(mi.cover_url).read())
            img_fmt = mi.cover_url.split(".")[-1]
            mi.cover_data = (img_fmt, img)

        if u'完結' in info.get(u'連載狀態', ""):
            day = re.findall('\d*-\d*-\d*', info[u'連載狀態'])
            try:
                mi.pubdate = datetime.datetime.strptime(day[0], '%Y-%m-%d')
            except:
                pass
        return mi
Beispiel #45
0
    if fmt == 'azw3':
        with TemporaryDirectory('create-azw3') as tdir, CurrentDir(tdir):
            for name, data in ((opf_name, opf), (html_name, HTML), (toc_name,
                                                                    ncx)):
                with open(name, 'wb') as f:
                    f.write(data)
            c = Container(os.path.dirname(os.path.abspath(opf_name)), opf_name,
                          DevNull())
            opf_to_azw3(opf_name, path, c)
    else:
        with ZipFile(path, 'w', compression=ZIP_STORED) as zf:
            zf.writestr('mimetype',
                        b'application/epub+zip',
                        compression=ZIP_STORED)
            zf.writestr('META-INF/', b'', 0o755)
            zf.writestr('META-INF/container.xml', CONTAINER)
            zf.writestr(opf_name, opf)
            zf.writestr(html_name, HTML)
            zf.writestr(toc_name, ncx)


if __name__ == '__main__':
    from calibre.ebooks.metadata.book.base import Metadata
    mi = Metadata('Test book', authors=('Kovid Goyal', ))
    path = sys.argv[-1]
    ext = path.rpartition('.')[-1].lower()
    if ext not in valid_empty_formats:
        print(('Unsupported format:', ext))
        raise SystemExit(1)
    create_book(mi, path, fmt=ext)
Beispiel #46
0
def build_meta(log, issue_id):
  '''Build metadata record based on comicvine issue_id'''
  issue = pycomicvine.Issue(issue_id, field_list=[
      'id', 'name', 'volume', 'issue_number', 'person_credits', 'description', 
      'store_date', 'cover_date'])
  if not issue or not issue.volume:
    log.warn('Unable to load Issue(%d)' % issue_id)
    return None
  title = '%s #%s' %  (issue.volume.name, issue.issue_number)
  if issue.name:
    title = title + ': %s' % (issue.name)
  authors = [p.name for p in issue.person_credits]
  meta = Metadata(title, authors)
  meta.series = issue.volume.name
  meta.series_index = str(issue.issue_number)
  meta.set_identifier('comicvine', str(issue.id))
  meta.set_identifier('comicvine-volume', str(issue.volume.id))
  meta.comments = issue.description
  meta.has_cover = False
  if issue.volume.publisher:
    meta.publisher = issue.volume.publisher.name
  meta.pubdate = issue.store_date or issue.cover_date
  return meta
Beispiel #47
0
    def parse(self, raw):
        from calibre.ebooks.metadata.book.base import Metadata
        from calibre.utils.date import parse_only_date, UNDEFINED_DATE
        from css_selectors import Select
        root = parse_html(raw)
        selector = Select(root)
        sku = next(selector('div.sku.attGroup'))
        info = sku.getparent()
        top = info.getparent().getparent()
        banner = top.find('div')
        spans = banner.findall('span')
        title = ''
        for i, span in enumerate(spans):
            if i == 0 or '12pt' in span.get('style', ''):
                title += astext(span)
            else:
                break
        authors = [re.sub(r'\(.*\)', '', x).strip() for x in astext(spans[-1]).split(',')]
        mi = Metadata(title.strip(), authors)

        # Identifiers
        isbns = [check_isbn(x.strip()) for x in astext(sku).split(',')]
        for isbn in isbns:
            if isbn:
                self.plugin.cache_isbn_to_identifier(isbn, self.sku)
        isbns = sorted(isbns, key=lambda x:len(x) if x else 0, reverse=True)
        if isbns and isbns[0]:
            mi.isbn = isbns[0]
        mi.set_identifier('edelweiss', self.sku)

        # Tags
        bisac = tuple(selector('div.bisac.attGroup'))
        if bisac:
            bisac = astext(bisac[0])
            mi.tags = [x.strip() for x in bisac.split(',')]
            mi.tags = [t[1:].strip() if t.startswith('&') else t for t in mi.tags]

        # Publisher
        pub = tuple(selector('div.supplier.attGroup'))
        if pub:
            pub = astext(pub[0])
            mi.publisher = pub

        # Pubdate
        pub = tuple(selector('div.shipDate.attGroupItem'))
        if pub:
            pub = astext(pub[0])
            parts = pub.partition(':')[0::2]
            pub = parts[1] or parts[0]
            try:
                if ', Ship Date:' in pub:
                    pub = pub.partition(', Ship Date:')[0]
                q = parse_only_date(pub, assume_utc=True)
                if q.year != UNDEFINED_DATE:
                    mi.pubdate = q
            except:
                self.log.exception('Error parsing published date: %r'%pub)

        # Comments
        comm = ''
        general = tuple(selector('div#pd-general-overview-content'))
        if general:
            q = self.render_comments(general[0])
            if q != '<p>No title summary available. </p>':
                comm += q
        general = tuple(selector('div#pd-general-contributor-content'))
        if general:
            comm += self.render_comments(general[0])
        general = tuple(selector('div#pd-general-quotes-content'))
        if general:
            comm += self.render_comments(general[0])
        if comm:
            mi.comments = comm

        # Cover
        img = tuple(selector('img.title-image[src]'))
        if img:
            href = img[0].get('src').replace('jacket_covers/medium/',
                                             'jacket_covers/flyout/')
            self.plugin.cache_identifier_to_cover_url(self.sku, href)

        mi.has_cover = self.plugin.cached_identifier_to_cover_url(self.sku) is not None

        return mi
Beispiel #48
0
def get_metadata_(src, encoding=None):
    # Meta data definitions as in
    # http://www.mobileread.com/forums/showpost.php?p=712544&postcount=9

    if isbytestring(src):
        if not encoding:
            src = xml_to_unicode(src)[0]
        else:
            src = src.decode(encoding, 'replace')
    src = src[:150000]  # Searching shouldn't take too long
    comment_tags = parse_comment_tags(src)
    meta_tags = parse_meta_tags(src)

    def get(field):
        ans = comment_tags.get(field, meta_tags.get(field, None))
        if ans:
            ans = ans.strip()
        if not ans:
            ans = None
        return ans

    # Title
    title = get('title')
    if not title:
        pat = re.compile('<title>([^<>]+?)</title>', re.IGNORECASE)
        match = pat.search(src)
        if match:
            title = replace_entities(match.group(1))

    # Author
    authors = get('authors') or _('Unknown')

    # Create MetaInformation with Title and Author
    mi = Metadata(title or _('Unknown'), string_to_authors(authors))

    for field in ('publisher', 'isbn', 'language', 'comments'):
        val = get(field)
        if val:
            setattr(mi, field, val)

    for field in ('pubdate', 'timestamp'):
        try:
            val = parse_date(get(field))
        except:
            pass
        else:
            if not is_date_undefined(val):
                setattr(mi, field, val)

    # SERIES
    series = get('series')
    if series:
        pat = re.compile(r'\[([.0-9]+)\]$')
        match = pat.search(series)
        series_index = None
        if match is not None:
            try:
                series_index = float(match.group(1))
            except:
                pass
            series = series.replace(match.group(), '').strip()
        mi.series = series
        if series_index is None:
            series_index = get('series_index')
            try:
                series_index = float(series_index)
            except:
                pass
        if series_index is not None:
            mi.series_index = series_index

    # RATING
    rating = get('rating')
    if rating:
        try:
            mi.rating = float(rating)
            if mi.rating < 0:
                mi.rating = 0
            if mi.rating > 5:
                mi.rating /= 2.
            if mi.rating > 5:
                mi.rating = 0
        except:
            pass

    # TAGS
    tags = get('tags')
    if tags:
        tags = [x.strip() for x in tags.split(',') if x.strip()]
        if tags:
            mi.tags = tags

    return mi
Beispiel #49
0
def read_metadata(root, ver=None, return_extra_data=False):
    ans = Metadata(_('Unknown'), [_('Unknown')])
    prefixes, refines = read_prefixes(root), read_refines(root)
    identifiers = read_identifiers(root, prefixes, refines)
    ids = {}
    for key, vals in iteritems(identifiers):
        if key == 'calibre':
            ans.application_id = vals[0]
        elif key == 'uuid':
            ans.uuid = vals[0]
        else:
            ids[key] = vals[0]
    ans.set_identifiers(ids)
    ans.title = read_title(root, prefixes, refines) or ans.title
    ans.title_sort = read_title_sort(root, prefixes, refines) or ans.title_sort
    ans.languages = read_languages(root, prefixes, refines) or ans.languages
    auts, aus = [], []
    for a in read_authors(root, prefixes, refines):
        auts.append(a.name), aus.append(a.sort)
    ans.authors = auts or ans.authors
    ans.author_sort = authors_to_string(aus) or ans.author_sort
    bkp = read_book_producers(root, prefixes, refines)
    if bkp:
        if bkp[0]:
            ans.book_producer = bkp[0]
    pd = read_pubdate(root, prefixes, refines)
    if not is_date_undefined(pd):
        ans.pubdate = pd
    ts = read_timestamp(root, prefixes, refines)
    if not is_date_undefined(ts):
        ans.timestamp = ts
    lm = read_last_modified(root, prefixes, refines)
    if not is_date_undefined(lm):
        ans.last_modified = lm
    ans.comments = read_comments(root, prefixes, refines) or ans.comments
    ans.publisher = read_publisher(root, prefixes, refines) or ans.publisher
    ans.tags = read_tags(root, prefixes, refines) or ans.tags
    ans.rating = read_rating(root, prefixes, refines) or ans.rating
    s, si = read_series(root, prefixes, refines)
    if s:
        ans.series, ans.series_index = s, si
    ans.author_link_map = read_author_link_map(root, prefixes,
                                               refines) or ans.author_link_map
    ans.user_categories = read_user_categories(root, prefixes,
                                               refines) or ans.user_categories
    for name, fm in iteritems((read_user_metadata(root, prefixes, refines)
                               or {})):
        ans.set_user_metadata(name, fm)
    if return_extra_data:
        ans = ans, ver, read_raster_cover(root, prefixes,
                                          refines), first_spine_item(
                                              root, prefixes, refines)
    return ans
Beispiel #50
0
    def merge(self, results, min_year, do_asr=True):
        ans = Metadata(_('Unknown'))

        # We assume the shortest title has the least cruft in it
        ans.title = self.length_merge('title', results, null_value=ans.title)

        # No harm in having extra authors, maybe something useful like an
        # editor or translator
        ans.authors = self.length_merge('authors',
                                        results,
                                        null_value=ans.authors,
                                        shortest=False)

        # We assume the shortest publisher has the least cruft in it
        ans.publisher = self.length_merge('publisher',
                                          results,
                                          null_value=ans.publisher)

        # We assume the smallest set of tags has the least cruft in it
        ans.tags = self.length_merge('tags',
                                     results,
                                     null_value=ans.tags,
                                     shortest=msprefs['fewer_tags'])

        # We assume the longest series has the most info in it
        ans.series = self.length_merge('series',
                                       results,
                                       null_value=ans.series,
                                       shortest=False)
        for r in results:
            if r.series and r.series == ans.series:
                ans.series_index = r.series_index
                break

        # Average the rating over all sources
        ratings = []
        for r in results:
            rating = r.rating
            if rating and rating > 0 and rating <= 5:
                ratings.append(rating)
        if ratings:
            ans.rating = sum(ratings) / len(ratings)

        # Smallest language is likely to be valid
        ans.language = self.length_merge('language',
                                         results,
                                         null_value=ans.language)

        # Choose longest comments
        ans.comments = self.length_merge('comments',
                                         results,
                                         null_value=ans.comments,
                                         shortest=False)

        # Published date
        if min_year:
            for r in results:
                year = getattr(r.pubdate, 'year', None)
                if year == min_year:
                    ans.pubdate = r.pubdate
                    break
            if getattr(ans.pubdate, 'year', None) == min_year:
                min_date = datetime(min_year,
                                    ans.pubdate.month,
                                    ans.pubdate.day,
                                    tzinfo=utc_tz)
            else:
                min_date = datetime(min_year, 1, 2, tzinfo=utc_tz)
            ans.pubdate = min_date
        else:
            min_date = datetime(3001, 1, 1, tzinfo=utc_tz)
            for r in results:
                if r.pubdate is not None:
                    candidate = as_utc(r.pubdate)
                    if candidate < min_date:
                        min_date = candidate
            if min_date.year < 3000:
                ans.pubdate = min_date

        # Identifiers
        for r in results:
            ans.identifiers.update(r.identifiers)

        # Cover URL
        ans.has_cached_cover_url = bool(
            [r for r in results if getattr(r, 'has_cached_cover_url', False)])

        # Merge any other fields with no special handling (random merge)
        touched_fields = set()
        for r in results:
            if hasattr(r, 'identify_plugin'):
                touched_fields |= r.identify_plugin.touched_fields

        for f in touched_fields:
            if f.startswith('identifier:') or not ans.is_null(f):
                continue
            setattr(ans, f,
                    self.random_merge(f, results, null_value=getattr(ans, f)))

        if do_asr:
            avg = [x.relevance_in_source for x in results]
            avg = sum(avg) / len(avg)
            ans.average_source_relevance = avg

        return ans
Beispiel #51
0
    def __init__(self,
                 parent,
                 text,
                 mi=None,
                 fm=None,
                 color_field=None,
                 icon_field_key=None,
                 icon_rule_kind=None):
        QDialog.__init__(self, parent)
        Ui_TemplateDialog.__init__(self)
        self.setupUi(self)

        self.coloring = color_field is not None
        self.iconing = icon_field_key is not None

        cols = []
        if fm is not None:
            for key in sorted(displayable_columns(fm),
                              key=lambda (k): sort_key(fm[k]['name'])
                              if k != color_row_key else 0):
                if key == color_row_key and not self.coloring:
                    continue
                from calibre.gui2.preferences.coloring import all_columns_string
                name = all_columns_string if key == color_row_key else fm[key][
                    'name']
                if name:
                    cols.append((name, key))

        self.color_layout.setVisible(False)
        self.icon_layout.setVisible(False)

        if self.coloring:
            self.color_layout.setVisible(True)
            for n1, k1 in cols:
                self.colored_field.addItem(n1, k1)
            self.colored_field.setCurrentIndex(
                self.colored_field.findData(color_field))
        elif self.iconing:
            self.icon_layout.setVisible(True)
            for n1, k1 in cols:
                self.icon_field.addItem(n1, k1)
            self.icon_file_names = []
            d = os.path.join(config_dir, 'cc_icons')
            if os.path.exists(d):
                for icon_file in os.listdir(d):
                    icon_file = icu_lower(icon_file)
                    if os.path.exists(os.path.join(d, icon_file)):
                        if icon_file.endswith('.png'):
                            self.icon_file_names.append(icon_file)
            self.icon_file_names.sort(key=sort_key)
            self.update_filename_box()

            dex = 0
            from calibre.gui2.preferences.coloring import icon_rule_kinds
            for i, tup in enumerate(icon_rule_kinds):
                txt, val = tup
                self.icon_kind.addItem(txt, userData=QVariant(val))
                if val == icon_rule_kind:
                    dex = i
            self.icon_kind.setCurrentIndex(dex)
            self.icon_field.setCurrentIndex(
                self.icon_field.findData(icon_field_key))

        if mi:
            self.mi = mi
        else:
            self.mi = Metadata(_('Title'), [_('Author')])
            self.mi.author_sort = _('Author Sort')
            self.mi.series = _('Series')
            self.mi.series_index = 3
            self.mi.rating = 4.0
            self.mi.tags = [_('Tag 1'), _('Tag 2')]
            self.mi.languages = ['eng']
            if fm is not None:
                self.mi.set_all_user_metadata(fm.custom_field_metadata())
            else:
                # No field metadata. Grab a copy from the current library so
                # that we can validate any custom column names. The values for
                # the columns will all be empty, which in some very unusual
                # cases might cause formatter errors. We can live with that.
                from calibre.gui2.ui import get_gui
                self.mi.set_all_user_metadata(get_gui(
                ).current_db.new_api.field_metadata.custom_field_metadata())

        # Remove help icon on title bar
        icon = self.windowIcon()
        self.setWindowFlags(self.windowFlags()
                            & (~Qt.WindowContextHelpButtonHint))
        self.setWindowIcon(icon)

        self.last_text = ''
        self.highlighter = TemplateHighlighter(self.textbox.document())
        self.textbox.cursorPositionChanged.connect(self.text_cursor_changed)
        self.textbox.textChanged.connect(self.textbox_changed)

        self.textbox.setTabStopWidth(10)
        self.source_code.setTabStopWidth(10)
        self.documentation.setReadOnly(True)
        self.source_code.setReadOnly(True)

        if text is not None:
            self.textbox.setPlainText(text)
        self.buttonBox.button(QDialogButtonBox.Ok).setText(_('&OK'))
        self.buttonBox.button(QDialogButtonBox.Cancel).setText(_('&Cancel'))
        self.color_copy_button.clicked.connect(self.color_to_clipboard)
        self.filename_button.clicked.connect(self.filename_button_clicked)
        self.icon_copy_button.clicked.connect(self.icon_to_clipboard)

        try:
            with open(P('template-functions.json'), 'rb') as f:
                self.builtin_source_dict = json.load(f, encoding='utf-8')
        except:
            self.builtin_source_dict = {}

        self.funcs = formatter_functions().get_functions()
        self.builtins = formatter_functions().get_builtins()

        func_names = sorted(self.funcs)
        self.function.clear()
        self.function.addItem('')
        self.function.addItems(func_names)
        self.function.setCurrentIndex(0)
        self.function.currentIndexChanged[str].connect(self.function_changed)
        self.textbox_changed()
        self.rule = (None, '')

        tt = _('Template language tutorial')
        self.template_tutorial.setText(
            '<a href="http://manual.calibre-ebook.com/template_lang.html">'
            '%s</a>' % tt)
        tt = _('Template function reference')
        self.template_func_reference.setText(
            '<a href="http://manual.calibre-ebook.com/template_ref.html">'
            '%s</a>' % tt)

        self.font_size_box.setValue(gprefs['gpm_template_editor_font_size'])
        self.font_size_box.valueChanged.connect(self.font_size_changed)
Beispiel #52
0
    def convert(self, oeb_book, output, input_plugin, opts, logger):
        """Convert from calibre's internal format to KePub."""
        common.log.debug("Running ePub conversion")
        self.epub_output_plugin.convert(
            oeb_book, output, input_plugin, opts, common.log
        )
        common.log.debug("Done ePub conversion")
        container = KEPubContainer(output, common.log, opts.kepub_clean_markup)

        if container.is_drm_encumbered:
            common.log.error("DRM-encumbered container, skipping conversion")
            return

        # Write the details file
        o = {
            "kepub_output_version": ".".join([str(n) for n in self.version]),
            "kepub_output_currenttime": datetime.utcnow().ctime(),
        }
        kte_data_file = self.temporary_file("_KePubOutputPluginInfo")
        kte_data_file.write(json.dumps(o).encode("UTF-8"))
        kte_data_file.close()
        container.copy_file_to_container(
            kte_data_file.name, name="plugininfo.kte", mt="application/json"
        )

        title = container.opf_xpath("./opf:metadata/dc:title/text()")
        if len(title) > 0:
            title = title[0]
        else:
            title = NULL_VALUES["title"]
        authors = container.opf_xpath(
            './opf:metadata/dc:creator[@opf:role="aut"]/text()'
        )
        if len(authors) < 1:
            authors = NULL_VALUES["authors"]
        mi = Metadata(title, authors)
        language = container.opf_xpath("./opf:metadata/dc:language/text()")
        if len(language) > 0:
            mi.languages = language
            language = language[0]
        else:
            mi.languages = NULL_VALUES["languages"]
            language = NULL_VALUES["language"]

        try:
            common.modify_epub(
                container,
                output,
                metadata=mi,
                opts={
                    "hyphenate": opts.kepub_hyphenate,
                    "hyphen_min_chars": opts.kepub_hyphenate_chars,
                    "hyphen_min_chars_before": opts.kepub_hyphenate_chars_before,
                    "hyphen_min_chars_after": opts.kepub_hyphenate_chars_after,
                    "hyphen_limit_lines": opts.kepub_hyphenate_limit_lines,
                    "no-hyphens": opts.kepub_disable_hyphenation,
                    "smarten_punctuation": False,
                    "extended_kepub_features": True,
                },
            )
        except Exception:
            common.log.exception("Failed converting!")
            raise
Beispiel #53
0
    def __init__(self,
                 prefix,
                 lpath,
                 title=None,
                 authors=None,
                 mime=None,
                 date=None,
                 ContentType=None,
                 thumbnail_name=None,
                 size=None,
                 other=None):
        from calibre.utils.date import parse_date
        #         debug_print('Book::__init__ - title=', title)
        show_debug = title is not None and title.lower().find("xxxxx") >= 0
        if other is not None:
            other.title = title
            other.published_date = date
        if show_debug:
            debug_print("Book::__init__ - title=", title, 'authors=', authors)
            debug_print("Book::__init__ - other=", other)
        super(Book, self).__init__(prefix, lpath, size, other)

        if title is not None and len(title) > 0:
            self.title = title

        if authors is not None and len(authors) > 0:
            self.authors_from_string(authors)
            if self.author_sort is None or self.author_sort == "Unknown":
                self.author_sort = author_to_author_sort(authors)

        self.mime = mime

        self.size = size  # will be set later if None

        if ContentType == '6' and date is not None:
            try:
                self.datetime = time.strptime(date, "%Y-%m-%dT%H:%M:%S.%f")
            except:
                try:
                    self.datetime = time.strptime(
                        date.split('+')[0], "%Y-%m-%dT%H:%M:%S")
                except:
                    try:
                        self.datetime = time.strptime(
                            date.split('+')[0], "%Y-%m-%d")
                    except:
                        try:
                            self.datetime = parse_date(
                                date, assume_utc=True).timetuple()
                        except:
                            try:
                                self.datetime = time.gmtime(
                                    os.path.getctime(self.path))
                            except:
                                self.datetime = time.gmtime()

        self.kobo_metadata = Metadata(title, self.authors)
        self.contentID = None
        self.current_shelves = []
        self.kobo_collections = []
        self.can_put_on_shelves = True
        self.kobo_series = None
        self.kobo_series_number = None  # Kobo stores the series number as string. And it can have a leading "#".
        self.kobo_subtitle = None

        if thumbnail_name is not None:
            self.thumbnail = ImageWrapper(thumbnail_name)

        if show_debug:
            debug_print("Book::__init__ end - self=", self)
            debug_print("Book::__init__ end - title=", title, 'authors=',
                        authors)
Beispiel #54
0
    def sample_results(self):
        m1 = Metadata('The Great Gatsby', ['Francis Scott Fitzgerald'])
        m2 = Metadata(
            'The Great Gatsby - An extra long title to test resizing',
            ['F. Scott Fitzgerald'])
        m1.has_cached_cover_url = True
        m2.has_cached_cover_url = False
        m1.comments = 'Some comments ' * 10
        m1.tags = ['tag%d' % i for i in range(20)]
        m1.rating = 4.4
        m1.language = 'en'
        m2.language = 'fr'
        m1.pubdate = utcnow()
        m2.pubdate = fromordinal(1000000)
        m1.publisher = 'Publisher 1'
        m2.publisher = 'Publisher 2'

        return [m1, m2]
Beispiel #55
0
    def test_legacy_adding_books(self):  # {{{
        'Test various adding/deleting books methods'
        from calibre.ebooks.metadata.book.base import Metadata
        from calibre.ptempfile import TemporaryFile
        legacy, old = self.init_legacy(self.cloned_library), self.init_old(
            self.cloned_library)
        mi = Metadata('Added Book0', authors=('Added Author', ))
        with TemporaryFile(suffix='.aff') as name:
            with open(name, 'wb') as f:
                f.write(b'xxx')
            T = partial(ET,
                        'add_books', ([name], ['AFF'], [mi]),
                        old=old,
                        legacy=legacy)
            T()(self)
            book_id = T(kwargs={'return_ids': True})(self)[1][0]
            self.assertEqual(legacy.new_api.formats(book_id), ('AFF', ))
            T(kwargs={'add_duplicates': False})(self)
            mi.title = 'Added Book1'
            mi.uuid = 'uuu'
            T = partial(ET,
                        'import_book', (mi, [name]),
                        old=old,
                        legacy=legacy)
            book_id = T()(self)
            self.assertNotEqual(legacy.uuid(book_id, index_is_id=True),
                                old.uuid(book_id, index_is_id=True))
            book_id = T(kwargs={'preserve_uuid': True})(self)
            self.assertEqual(legacy.uuid(book_id, index_is_id=True),
                             old.uuid(book_id, index_is_id=True))
            self.assertEqual(legacy.new_api.formats(book_id), ('AFF', ))

            T = partial(ET, 'add_format', old=old, legacy=legacy)
            T((0, 'AFF', BytesIO(b'fffff')))(self)
            T((0, 'AFF', BytesIO(b'fffff')))(self)
            T((0, 'AFF', BytesIO(b'fffff')), {'replace': True})(self)
        with TemporaryFile(suffix='.opf') as name:
            with open(name, 'wb') as f:
                f.write(b'zzzz')
            T = partial(ET,
                        'import_book', (mi, [name]),
                        old=old,
                        legacy=legacy)
            book_id = T()(self)
            self.assertFalse(legacy.new_api.formats(book_id))

        mi.title = 'Added Book2'
        T = partial(ET, 'create_book_entry', (mi, ), old=old, legacy=legacy)
        T()
        T({'add_duplicates': False})
        T({'force_id': 1000})

        with TemporaryFile(suffix='.txt') as name:
            with open(name, 'wb') as f:
                f.write(b'tttttt')
            bid = legacy.add_catalog(name, 'My Catalog')
            self.assertEqual(old.add_catalog(name, 'My Catalog'), bid)
            cache = legacy.new_api
            self.assertEqual(cache.formats(bid), ('TXT', ))
            self.assertEqual(cache.field_for('title', bid), 'My Catalog')
            self.assertEqual(cache.field_for('authors', bid), ('calibre', ))
            self.assertEqual(cache.field_for('tags', bid), (_('Catalog'), ))
            self.assertTrue(bid < legacy.add_catalog(name, 'Something else'))
            self.assertEqual(legacy.add_catalog(name, 'My Catalog'), bid)
            self.assertEqual(old.add_catalog(name, 'My Catalog'), bid)

            bid = legacy.add_news(
                name, {
                    'title': 'Events',
                    'add_title_tag': True,
                    'custom_tags': ('one', 'two')
                })
            self.assertEqual(cache.formats(bid), ('TXT', ))
            self.assertEqual(cache.field_for('authors', bid), ('calibre', ))
            self.assertEqual(cache.field_for('tags', bid),
                             (_('News'), 'Events', 'one', 'two'))

        self.assertTrue(legacy.cover(1, index_is_id=True))
        origcov = legacy.cover(1, index_is_id=True)
        self.assertTrue(legacy.has_cover(1))
        legacy.remove_cover(1)
        self.assertFalse(legacy.has_cover(1))
        self.assertFalse(legacy.cover(1, index_is_id=True))
        legacy.set_cover(3, origcov)
        self.assertEqual(legacy.cover(3, index_is_id=True), origcov)
        self.assertTrue(legacy.has_cover(3))

        self.assertTrue(legacy.format(1, 'FMT1', index_is_id=True))
        legacy.remove_format(1, 'FMT1', index_is_id=True)
        self.assertIsNone(legacy.format(1, 'FMT1', index_is_id=True))

        legacy.delete_book(1)
        old.delete_book(1)
        self.assertNotIn(1, legacy.all_ids())
        legacy.dump_metadata((2, 3))
        old.close()
Beispiel #56
0
class TemplateDialog(QDialog, Ui_TemplateDialog):
    def __init__(self,
                 parent,
                 text,
                 mi=None,
                 fm=None,
                 color_field=None,
                 icon_field_key=None,
                 icon_rule_kind=None):
        QDialog.__init__(self, parent)
        Ui_TemplateDialog.__init__(self)
        self.setupUi(self)

        self.coloring = color_field is not None
        self.iconing = icon_field_key is not None

        cols = []
        if fm is not None:
            for key in sorted(displayable_columns(fm),
                              key=lambda (k): sort_key(fm[k]['name'])
                              if k != color_row_key else 0):
                if key == color_row_key and not self.coloring:
                    continue
                from calibre.gui2.preferences.coloring import all_columns_string
                name = all_columns_string if key == color_row_key else fm[key][
                    'name']
                if name:
                    cols.append((name, key))

        self.color_layout.setVisible(False)
        self.icon_layout.setVisible(False)

        if self.coloring:
            self.color_layout.setVisible(True)
            for n1, k1 in cols:
                self.colored_field.addItem(n1, k1)
            self.colored_field.setCurrentIndex(
                self.colored_field.findData(color_field))
        elif self.iconing:
            self.icon_layout.setVisible(True)
            for n1, k1 in cols:
                self.icon_field.addItem(n1, k1)
            self.icon_file_names = []
            d = os.path.join(config_dir, 'cc_icons')
            if os.path.exists(d):
                for icon_file in os.listdir(d):
                    icon_file = icu_lower(icon_file)
                    if os.path.exists(os.path.join(d, icon_file)):
                        if icon_file.endswith('.png'):
                            self.icon_file_names.append(icon_file)
            self.icon_file_names.sort(key=sort_key)
            self.update_filename_box()

            dex = 0
            from calibre.gui2.preferences.coloring import icon_rule_kinds
            for i, tup in enumerate(icon_rule_kinds):
                txt, val = tup
                self.icon_kind.addItem(txt, userData=QVariant(val))
                if val == icon_rule_kind:
                    dex = i
            self.icon_kind.setCurrentIndex(dex)
            self.icon_field.setCurrentIndex(
                self.icon_field.findData(icon_field_key))

        if mi:
            self.mi = mi
        else:
            self.mi = Metadata(_('Title'), [_('Author')])
            self.mi.author_sort = _('Author Sort')
            self.mi.series = _('Series')
            self.mi.series_index = 3
            self.mi.rating = 4.0
            self.mi.tags = [_('Tag 1'), _('Tag 2')]
            self.mi.languages = ['eng']
            if fm is not None:
                self.mi.set_all_user_metadata(fm.custom_field_metadata())
            else:
                # No field metadata. Grab a copy from the current library so
                # that we can validate any custom column names. The values for
                # the columns will all be empty, which in some very unusual
                # cases might cause formatter errors. We can live with that.
                from calibre.gui2.ui import get_gui
                self.mi.set_all_user_metadata(get_gui(
                ).current_db.new_api.field_metadata.custom_field_metadata())

        # Remove help icon on title bar
        icon = self.windowIcon()
        self.setWindowFlags(self.windowFlags()
                            & (~Qt.WindowContextHelpButtonHint))
        self.setWindowIcon(icon)

        self.last_text = ''
        self.highlighter = TemplateHighlighter(self.textbox.document())
        self.textbox.cursorPositionChanged.connect(self.text_cursor_changed)
        self.textbox.textChanged.connect(self.textbox_changed)

        self.textbox.setTabStopWidth(10)
        self.source_code.setTabStopWidth(10)
        self.documentation.setReadOnly(True)
        self.source_code.setReadOnly(True)

        if text is not None:
            self.textbox.setPlainText(text)
        self.buttonBox.button(QDialogButtonBox.Ok).setText(_('&OK'))
        self.buttonBox.button(QDialogButtonBox.Cancel).setText(_('&Cancel'))
        self.color_copy_button.clicked.connect(self.color_to_clipboard)
        self.filename_button.clicked.connect(self.filename_button_clicked)
        self.icon_copy_button.clicked.connect(self.icon_to_clipboard)

        try:
            with open(P('template-functions.json'), 'rb') as f:
                self.builtin_source_dict = json.load(f, encoding='utf-8')
        except:
            self.builtin_source_dict = {}

        self.funcs = formatter_functions().get_functions()
        self.builtins = formatter_functions().get_builtins()

        func_names = sorted(self.funcs)
        self.function.clear()
        self.function.addItem('')
        self.function.addItems(func_names)
        self.function.setCurrentIndex(0)
        self.function.currentIndexChanged[str].connect(self.function_changed)
        self.textbox_changed()
        self.rule = (None, '')

        tt = _('Template language tutorial')
        self.template_tutorial.setText(
            '<a href="http://manual.calibre-ebook.com/template_lang.html">'
            '%s</a>' % tt)
        tt = _('Template function reference')
        self.template_func_reference.setText(
            '<a href="http://manual.calibre-ebook.com/template_ref.html">'
            '%s</a>' % tt)

        self.font_size_box.setValue(gprefs['gpm_template_editor_font_size'])
        self.font_size_box.valueChanged.connect(self.font_size_changed)

    def font_size_changed(self, toWhat):
        gprefs['gpm_template_editor_font_size'] = toWhat
        self.highlighter.initializeFormats()
        self.highlighter.rehighlight()

    def filename_button_clicked(self):
        try:
            path = choose_files(self,
                                'choose_category_icon',
                                _('Select Icon'),
                                filters=[('Images',
                                          ['png', 'gif', 'jpg', 'jpeg'])],
                                all_files=False,
                                select_only_single_file=True)
            if path:
                icon_path = path[0]
                icon_name = sanitize_file_name_unicode(
                    os.path.splitext(os.path.basename(icon_path))[0] + '.png')
                if icon_name not in self.icon_file_names:
                    self.icon_file_names.append(icon_name)
                    self.update_filename_box()
                    try:
                        p = QIcon(icon_path).pixmap(QSize(128, 128))
                        d = os.path.join(config_dir, 'cc_icons')
                        if not os.path.exists(os.path.join(d, icon_name)):
                            if not os.path.exists(d):
                                os.makedirs(d)
                            with open(os.path.join(d, icon_name), 'wb') as f:
                                f.write(pixmap_to_data(p, format='PNG'))
                    except:
                        traceback.print_exc()
                self.icon_files.setCurrentIndex(
                    self.icon_files.findText(icon_name))
                self.icon_files.adjustSize()
        except:
            traceback.print_exc()
        return

    def update_filename_box(self):
        self.icon_files.clear()
        self.icon_file_names.sort(key=sort_key)
        self.icon_files.addItem('')
        self.icon_files.addItems(self.icon_file_names)
        for i, filename in enumerate(self.icon_file_names):
            icon = QIcon(os.path.join(config_dir, 'cc_icons', filename))
            self.icon_files.setItemIcon(i + 1, icon)

    def color_to_clipboard(self):
        app = QApplication.instance()
        c = app.clipboard()
        c.setText(unicode(self.color_name.color))

    def icon_to_clipboard(self):
        app = QApplication.instance()
        c = app.clipboard()
        c.setText(unicode(self.icon_files.currentText()))

    def textbox_changed(self):
        cur_text = unicode(self.textbox.toPlainText())
        if self.last_text != cur_text:
            self.last_text = cur_text
            self.highlighter.regenerate_paren_positions()
            self.text_cursor_changed()
            self.template_value.setText(SafeFormat().safe_format(
                cur_text, self.mi, _('EXCEPTION: '), self.mi))

    def text_cursor_changed(self):
        cursor = self.textbox.textCursor()
        position = cursor.position()
        t = unicode(self.textbox.toPlainText())
        if position > 0 and position <= len(t):
            block_number = cursor.blockNumber()
            pos_in_block = cursor.positionInBlock() - 1
            self.highlighter.check_cursor_pos(t[position - 1], block_number,
                                              pos_in_block)

    def function_changed(self, toWhat):
        name = unicode(toWhat)
        self.source_code.clear()
        self.documentation.clear()
        if name in self.funcs:
            self.documentation.setPlainText(self.funcs[name].doc)
            if name in self.builtins and name in self.builtin_source_dict:
                self.source_code.setPlainText(self.builtin_source_dict[name])
            else:
                self.source_code.setPlainText(self.funcs[name].program_text)

    def accept(self):
        txt = unicode(self.textbox.toPlainText()).rstrip()
        if self.coloring:
            if self.colored_field.currentIndex() == -1:
                error_dialog(self,
                             _('No column chosen'),
                             _('You must specify a column to be colored'),
                             show=True)
                return
            if not txt:
                error_dialog(self,
                             _('No template provided'),
                             _('The template box cannot be empty'),
                             show=True)
                return

            self.rule = (unicode(
                self.colored_field.itemData(
                    self.colored_field.currentIndex()).toString()), txt)
        elif self.iconing:
            rt = unicode(
                self.icon_kind.itemData(
                    self.icon_kind.currentIndex()).toString())
            self.rule = (rt,
                         unicode(
                             self.icon_field.itemData(
                                 self.icon_field.currentIndex()).toString()),
                         txt)
        else:
            self.rule = ('', txt)
        QDialog.accept(self)
Beispiel #57
0
    def test_legacy_setters(self):  # {{{
        'Test methods that are directly equivalent in the old and new interface'
        from calibre.ebooks.metadata.book.base import Metadata
        from calibre.utils.date import now
        n = now()
        ndb = self.init_legacy(self.cloned_library)
        amap = ndb.new_api.get_id_map('authors')
        sorts = [(aid, 's%d' % aid) for aid in amap]
        db = self.init_old(self.cloned_library)
        run_funcs(self, db, ndb, (
            ('+format_metadata', 1, 'FMT1', itemgetter('size')),
            ('+format_metadata', 1, 'FMT2', itemgetter('size')),
            ('+format_metadata', 2, 'FMT1', itemgetter('size')),
            ('get_tags', 0),
            ('get_tags', 1),
            ('get_tags', 2),
            ('is_tag_used', 'News'),
            ('is_tag_used', 'xchkjgfh'),
            ('bulk_modify_tags', (1, ), ['t1'], ['News']),
            ('bulk_modify_tags', (2, ), ['t1'], ['Tag One', 'Tag Two']),
            ('bulk_modify_tags', (3, ), ['t1', 't2', 't3']),
            (db.clean, ),
            ('@all_tags', ),
            ('@tags', 0),
            ('@tags', 1),
            ('@tags', 2),
            ('unapply_tags', 1, ['t1']),
            ('unapply_tags', 2, ['xxxx']),
            ('unapply_tags', 3, ['t2', 't3']),
            (db.clean, ),
            ('@all_tags', ),
            ('@tags', 0),
            ('@tags', 1),
            ('@tags', 2),
            ('update_last_modified', (1, ), True, n),
            ('update_last_modified', (3, ), True, n),
            ('metadata_last_modified', 1, True),
            ('metadata_last_modified', 3, True),
            ('set_sort_field_for_author', sorts[0][0], sorts[0][1]),
            ('set_sort_field_for_author', sorts[1][0], sorts[1][1]),
            ('set_sort_field_for_author', sorts[2][0], sorts[2][1]),
            ('set_link_field_for_author', sorts[0][0], sorts[0][1]),
            ('set_link_field_for_author', sorts[1][0], sorts[1][1]),
            ('set_link_field_for_author', sorts[2][0], sorts[2][1]),
            (db.refresh, ),
            ('author_sort', 0),
            ('author_sort', 1),
            ('author_sort', 2),
        ))
        omi = [db.get_metadata(x) for x in (0, 1, 2)]
        nmi = [ndb.get_metadata(x) for x in (0, 1, 2)]
        self.assertEqual([x.author_sort_map for x in omi],
                         [x.author_sort_map for x in nmi])
        self.assertEqual([x.author_link_map for x in omi],
                         [x.author_link_map for x in nmi])
        db.close()

        ndb = self.init_legacy(self.cloned_library)
        db = self.init_old(self.cloned_library)

        run_funcs(self, db, ndb, (
            (
                'set_authors',
                1,
                ('author one', ),
            ),
            ('set_authors', 2, ('author two', ), True, True, True),
            ('set_author_sort', 3, 'new_aus'),
            ('set_comment', 1, ''),
            ('set_comment', 2, None),
            ('set_comment', 3, '<p>a comment</p>'),
            ('set_has_cover', 1, True),
            ('set_has_cover', 2, True),
            ('set_has_cover', 3, 1),
            ('set_identifiers', 2, {
                'test': '',
                'a': 'b'
            }),
            ('set_identifiers', 3, {
                'id': '1',
                'isbn': '9783161484100'
            }),
            ('set_identifiers', 1, {}),
            ('set_languages', 1, ('en', )),
            ('set_languages', 2, ()),
            ('set_languages', 3, ('deu', 'spa', 'fra')),
            ('set_pubdate', 1, None),
            ('set_pubdate', 2, '2011-1-7'),
            ('set_series', 1, 'a series one'),
            ('set_series', 2, 'another series [7]'),
            ('set_series', 3, 'a third series'),
            ('set_publisher', 1, 'publisher two'),
            ('set_publisher', 2, None),
            ('set_publisher', 3, 'a third puB'),
            ('set_rating', 1, 2.3),
            ('set_rating', 2, 0),
            ('set_rating', 3, 8),
            ('set_timestamp', 1, None),
            ('set_timestamp', 2, '2011-1-7'),
            ('set_uuid', 1, None),
            ('set_uuid', 2, 'a test uuid'),
            ('set_title', 1, 'title two'),
            ('set_title', 2, None),
            ('set_title', 3, 'The Test Title'),
            ('set_tags', 1, ['a1', 'a2'], True),
            ('set_tags', 2, ['b1', 'tag one'], False, False, False, True),
            ('set_tags', 3, ['A1']),
            (db.refresh, ),
            ('title', 0),
            ('title', 1),
            ('title', 2),
            ('title_sort', 0),
            ('title_sort', 1),
            ('title_sort', 2),
            ('authors', 0),
            ('authors', 1),
            ('authors', 2),
            ('author_sort', 0),
            ('author_sort', 1),
            ('author_sort', 2),
            ('has_cover', 3),
            ('has_cover', 1),
            ('has_cover', 2),
            ('get_identifiers', 0),
            ('get_identifiers', 1),
            ('get_identifiers', 2),
            ('pubdate', 0),
            ('pubdate', 1),
            ('pubdate', 2),
            ('timestamp', 0),
            ('timestamp', 1),
            ('timestamp', 2),
            ('publisher', 0),
            ('publisher', 1),
            ('publisher', 2),
            ('rating', 0),
            ('+rating', 1, lambda x: x or 0),
            ('rating', 2),
            ('series', 0),
            ('series', 1),
            ('series', 2),
            ('series_index', 0),
            ('series_index', 1),
            ('series_index', 2),
            ('uuid', 0),
            ('uuid', 1),
            ('uuid', 2),
            ('isbn', 0),
            ('isbn', 1),
            ('isbn', 2),
            ('@tags', 0),
            ('@tags', 1),
            ('@tags', 2),
            ('@all_tags', ),
            ('@get_all_identifier_types', ),
            ('set_title_sort', 1, 'Title Two'),
            ('set_title_sort', 2, None),
            ('set_title_sort', 3, 'The Test Title_sort'),
            ('set_series_index', 1, 2.3),
            ('set_series_index', 2, 0),
            ('set_series_index', 3, 8),
            ('set_identifier', 1, 'moose', 'val'),
            ('set_identifier', 2, 'test', ''),
            ('set_identifier', 3, '', ''),
            (db.refresh, ),
            ('series_index', 0),
            ('series_index', 1),
            ('series_index', 2),
            ('title_sort', 0),
            ('title_sort', 1),
            ('title_sort', 2),
            ('get_identifiers', 0),
            ('get_identifiers', 1),
            ('get_identifiers', 2),
            ('@get_all_identifier_types', ),
            ('set_metadata', 1, Metadata(
                'title', ('a1', )), False, False, False, True, True),
            ('set_metadata', 3, Metadata('title', ('a1', ))),
            (db.refresh, ),
            ('title', 0),
            ('title', 1),
            ('title', 2),
            ('title_sort', 0),
            ('title_sort', 1),
            ('title_sort', 2),
            ('authors', 0),
            ('authors', 1),
            ('authors', 2),
            ('author_sort', 0),
            ('author_sort', 1),
            ('author_sort', 2),
            ('@tags', 0),
            ('@tags', 1),
            ('@tags', 2),
            ('@all_tags', ),
            ('@get_all_identifier_types', ),
        ))
        db.close()

        ndb = self.init_legacy(self.cloned_library)
        db = self.init_old(self.cloned_library)

        run_funcs(self, db, ndb, (
            ('set', 0, 'title', 'newtitle'),
            ('set', 0, 'tags', 't1,t2,tag one', True),
            ('set', 0, 'authors', 'author one & Author Two', True),
            ('set', 0, 'rating', 3.2),
            ('set', 0, 'publisher', 'publisher one', False),
            (db.refresh, ),
            ('title', 0),
            ('rating', 0),
            ('#tags', 0),
            ('#tags', 1),
            ('#tags', 2),
            ('authors', 0),
            ('authors', 1),
            ('authors', 2),
            ('publisher', 0),
            ('publisher', 1),
            ('publisher', 2),
            ('delete_tag', 'T1'),
            ('delete_tag', 'T2'),
            ('delete_tag', 'Tag one'),
            ('delete_tag', 'News'),
            (db.clean, ),
            (db.refresh, ),
            ('@all_tags', ),
            ('#tags', 0),
            ('#tags', 1),
            ('#tags', 2),
        ))
        db.close()

        ndb = self.init_legacy(self.cloned_library)
        db = self.init_old(self.cloned_library)
        run_funcs(self, db, ndb, (
            ('remove_all_tags', (1, 2, 3)),
            (db.clean, ),
            ('@all_tags', ),
            ('@tags', 0),
            ('@tags', 1),
            ('@tags', 2),
        ))
        db.close()

        ndb = self.init_legacy(self.cloned_library)
        db = self.init_old(self.cloned_library)
        a = {v: k
             for k, v in ndb.new_api.get_id_map('authors').iteritems()
             }['Author One']
        t = {v: k
             for k, v in ndb.new_api.get_id_map('tags').iteritems()}['Tag One']
        s = {v: k
             for k, v in ndb.new_api.get_id_map('series').iteritems()
             }['A Series One']
        p = {v: k
             for k, v in ndb.new_api.get_id_map('publisher').iteritems()
             }['Publisher One']
        run_funcs(self, db, ndb, (
            ('rename_author', a, 'Author Two'),
            ('rename_tag', t, 'News'),
            ('rename_series', s, 'ss'),
            ('rename_publisher', p, 'publisher one'),
            (db.clean, ),
            (db.refresh, ),
            ('@all_tags', ),
            ('tags', 0),
            ('tags', 1),
            ('tags', 2),
            ('series', 0),
            ('series', 1),
            ('series', 2),
            ('publisher', 0),
            ('publisher', 1),
            ('publisher', 2),
            ('series_index', 0),
            ('series_index', 1),
            ('series_index', 2),
            ('authors', 0),
            ('authors', 1),
            ('authors', 2),
            ('author_sort', 0),
            ('author_sort', 1),
            ('author_sort', 2),
        ))
        db.close()
    def parse_details(self, root):
        try:
            kyobobook_id = self.parse_kyobobook_id(self.url)
        except:
            self.log.exception('Error parsing Kyobobook id for url: %r' %
                               self.url)
            kyobobook_id = None

        try:
            (title, series, series_index) = self.parse_title_series(root)
        except:
            self.log.exception('Error parsing title and series for url: %r' %
                               self.url)
            title = series = series_index = None

        try:
            authors = self.parse_authors(root)
        except:
            self.log.exception('Error parsing authors for url: %r' % self.url)
            authors = []

        if not title or not authors or not kyobobook_id:
            self.log.error('Could not find title/authors/kyobobook id for %r' %
                           self.url)
            self.log.error('Kyobobook: %r Title: %r Authors: %r' %
                           (kyobobook_id, title, authors))
            return

        mi = Metadata(title, authors)
        if series:
            mi.series = series
            mi.series_index = series_index
        mi.set_identifier('kyobobook', kyobobook_id)
        self.kyobobook_id = kyobobook_id

        try:
            isbn = self.parse_isbn(root)
            if isbn:
                self.isbn = mi.isbn = isbn
        except:
            self.log.exception('Error parsing ISBN for url: %r' % self.url)

        try:
            mi.rating = self.parse_rating(root)
        except:
            self.log.exception('Error parsing ratings for url: %r' % self.url)

        try:
            mi.comments = self.parse_comments(root)
        except:
            self.log.exception('Error parsing comments for url: %r' % self.url)

        try:
            self.cover_url = self.parse_cover(root)
        except:
            self.log.exception('Error parsing cover for url: %r' % self.url)
        mi.has_cover = bool(self.cover_url)

        try:
            tags = self.parse_tags(root)
            if tags:
                mi.tags = tags
        except:
            self.log.exception('Error parsing tags for url: %r' % self.url)

        try:
            mi.publisher, mi.pubdate = self.parse_publisher_and_date(root)
        except:
            self.log.exception('Error parsing publisher and date for url: %r' %
                               self.url)

        try:
            lang = self._parse_language(root)
            if lang:
                mi.language = lang
        except:
            self.log.exception('Error parsing language for url: %r' % self.url)

        mi.source_relevance = self.relevance

        if self.kyobobook_id:
            if self.isbn:
                self.plugin.cache_isbn_to_identifier(self.isbn,
                                                     self.kyobobook_id)
            if self.cover_url:
                self.plugin.cache_identifier_to_cover_url(
                    self.kyobobook_id, self.cover_url)

        self.plugin.clean_downloaded_metadata(mi)

        self.result_queue.put(mi)
Beispiel #59
0
    def parse_feed(self, feed, seen, orig_title, orig_authors, identifiers):
        from lxml import etree

        def tostring(x):
            if x is None:
                return ''
            return etree.tostring(x, method='text', encoding=unicode).strip()

        orig_isbn = identifiers.get('isbn', None)
        title_tokens = list(self.get_title_tokens(orig_title))
        author_tokens = list(self.get_author_tokens(orig_authors))
        results = []

        def ismatch(title, authors):
            authors = lower(' '.join(authors))
            title = lower(title)
            match = not title_tokens
            for t in title_tokens:
                if lower(t) in title:
                    match = True
                    break
            amatch = not author_tokens
            for a in author_tokens:
                if lower(a) in authors:
                    amatch = True
                    break
            if not author_tokens: amatch = True
            return match and amatch

        bl = feed.find('BookList')
        if bl is None:
            err = tostring(feed.find('errormessage'))
            raise ValueError('ISBNDb query failed:' + err)
        total_results = int(bl.get('total_results'))
        shown_results = int(bl.get('shown_results'))
        for bd in bl.xpath('.//BookData'):
            isbn = check_isbn(bd.get('isbn', None))
            isbn13 = check_isbn(bd.get('isbn13', None))
            if not isbn and not isbn13:
                continue
            if orig_isbn and orig_isbn not in {isbn, isbn13}:
                continue
            title = tostring(bd.find('Title'))
            if not title:
                continue
            authors = []
            for au in bd.xpath('.//Authors/Person'):
                au = tostring(au)
                if au:
                    if ',' in au:
                        ln, _, fn = au.partition(',')
                        au = fn.strip() + ' ' + ln.strip()
                authors.append(au)
            if not authors:
                continue
            comments = tostring(bd.find('Summary'))
            id_ = (title, tuple(authors))
            if id_ in seen:
                continue
            seen.add(id_)
            if not ismatch(title, authors):
                continue
            publisher = tostring(bd.find('PublisherText'))
            if not publisher: publisher = None
            if publisher and 'audio' in publisher.lower():
                continue
            mi = Metadata(title, authors)
            mi.isbn = isbn
            mi.publisher = publisher
            mi.comments = comments
            results.append(mi)
        return total_results, shown_results, results
Beispiel #60
0
 def start(self, title, authors, identifiers):
     book = Metadata(title, authors)
     book.identifiers = identifiers
     self.covers_widget.start(book, self.current_cover, title, authors, {})
     return self.exec_()