Exemple #1
0
 def initialize(self):
     self.beginResetModel()
     self.plugins = list(all_metadata_plugins())
     self.plugins.sort(key=attrgetter('name'))
     self.enabled_overrides = {}
     self.cover_overrides = {}
     self.endResetModel()
Exemple #2
0
def option_parser():
    parser = OptionParser(_('''\
%prog [options]

Fetch book metadata from online sources. You must specify at least one
of title, authors or ISBN.
'''
    ))
    parser.add_option('-t', '--title', help=_('Book title'))
    parser.add_option('-a', '--authors', help=_('Book author(s)'))
    parser.add_option('-i', '--isbn', help=_('Book ISBN'))
    parser.add_option('-I', '--identifier', action='append', default=[], help=_(
        'Identifiers such as ASIN/goodreads id etc. Can be specified multiple times for multiple identifiers.'
        ' For example: ') + '--identifier asin:B0082BAJA0')
    parser.add_option('-v', '--verbose', default=False, action='store_true',
                      help=_('Print the log to the console (stderr)'))
    parser.add_option('-o', '--opf', help=_('Output the metadata in OPF format instead of human readable text.'), action='store_true', default=False)
    parser.add_option('-c', '--cover',
            help=_('Specify a filename. The cover, if available, will be saved to it. Without this option, no cover will be downloaded.'))
    parser.add_option('-d', '--timeout', default='30',
            help=_('Timeout in seconds. Default is 30'))
    parser.add_option('-p', '--allowed-plugin', action='append', default=[],
            help=_('Specify the name of a metadata download plugin to use.'
                   ' By default, all metadata plugins will be used.'
                   ' Can be specified multiple times for multiple plugins.'
                   ' All plugin names: {}').format(', '.join(p.name for p in all_metadata_plugins())))

    return parser
Exemple #3
0
def option_parser():
    parser = OptionParser(_('''\
%prog [options]

Fetch book metadata from online sources. You must specify at least one
of title, authors or ISBN.
'''
    ))
    parser.add_option('-t', '--title', help=_('Book title'))
    parser.add_option('-a', '--authors', help=_('Book author(s)'))
    parser.add_option('-i', '--isbn', help=_('Book ISBN'))
    parser.add_option('-I', '--identifier', action='append', default=[], help=_(
        'Identifiers such as ASIN/goodreads id etc. Can be specified multiple times for multiple identifiers.'
        ' For example: ') + '--identifier asin:B0082BAJA0')
    parser.add_option('-v', '--verbose', default=False, action='store_true',
                      help=_('Print the log to the console (stderr)'))
    parser.add_option('-o', '--opf', help=_('Output the metadata in OPF format instead of human readable text.'), action='store_true', default=False)
    parser.add_option('-c', '--cover',
            help=_('Specify a filename. The cover, if available, will be saved to it. Without this option, no cover will be downloaded.'))
    parser.add_option('-d', '--timeout', default='30',
            help=_('Timeout in seconds. Default is 30'))
    parser.add_option('-p', '--allowed-plugin', action='append', default=[],
            help=_('Specify the name of a metadata download plugin to use.'
                   ' By default, all metadata plugins will be used.'
                   ' Can be specified multiple times for multiple plugins.'
                   ' All plugin names: {}').format(', '.join(p.name for p in all_metadata_plugins())))

    return parser
Exemple #4
0
def urls_from_identifiers(identifiers): # {{{
    identifiers = dict([(k.lower(), v) for k, v in identifiers.iteritems()])
    ans = []
    for plugin in all_metadata_plugins():
        try:
            id_type, id_val, url = plugin.get_book_url(identifiers)
            ans.append((plugin.get_book_url_name(id_type, id_val, url), id_type, id_val, url))
        except:
            pass
    isbn = identifiers.get('isbn', None)
    if isbn:
        ans.append((isbn, 'isbn', isbn,
            'http://www.worldcat.org/isbn/'+isbn))
    doi = identifiers.get('doi', None)
    if doi:
        ans.append(('DOI', 'doi', doi,
            'http://dx.doi.org/'+doi))
    arxiv = identifiers.get('arxiv', None)
    if arxiv:
        ans.append(('arXiv', 'arxiv', arxiv,
            'http://arxiv.org/abs/'+arxiv))
    oclc = identifiers.get('oclc', None)
    if oclc:
        ans.append(('OCLC', 'oclc', oclc,
            'http://www.worldcat.org/oclc/'+oclc))
    url = identifiers.get('uri', None)
    if url is None:
        url = identifiers.get('url', None)
    if url and url.startswith('http'):
        url = url[:8].replace('|', ':') + url[8:].replace('|', ',')
        parts = urlparse(url)
        name = parts.netloc
        ans.append((name, 'url', url, url))
    return ans
Exemple #5
0
 def initialize(self):
     self.beginResetModel()
     self.plugins = list(all_metadata_plugins())
     self.plugins.sort(key=attrgetter('name'))
     self.enabled_overrides = {}
     self.cover_overrides = {}
     self.endResetModel()
Exemple #6
0
def urls_from_identifiers(identifiers):  # {{{
    identifiers = dict([(k.lower(), v) for k, v in identifiers.iteritems()])
    ans = []
    for plugin in all_metadata_plugins():
        try:
            id_type, id_val, url = plugin.get_book_url(identifiers)
            ans.append((plugin.get_book_url_name(id_type, id_val,
                                                 url), id_type, id_val, url))
        except:
            pass
    isbn = identifiers.get('isbn', None)
    if isbn:
        ans.append(
            (isbn, 'isbn', isbn, 'http://www.worldcat.org/isbn/' + isbn))
    doi = identifiers.get('doi', None)
    if doi:
        ans.append(('DOI', 'doi', doi, 'http://dx.doi.org/' + doi))
    arxiv = identifiers.get('arxiv', None)
    if arxiv:
        ans.append(('arXiv', 'arxiv', arxiv, 'http://arxiv.org/abs/' + arxiv))
    oclc = identifiers.get('oclc', None)
    if oclc:
        ans.append(
            ('OCLC', 'oclc', oclc, 'http://www.worldcat.org/oclc/' + oclc))
    url = identifiers.get('uri', None)
    if url is None:
        url = identifiers.get('url', None)
    if url and url.startswith('http'):
        url = url[:8].replace('|', ':') + url[8:].replace('|', ',')
        parts = urlparse(url)
        name = parts.netloc
        ans.append((name, 'url', url, url))
    return ans
Exemple #7
0
def no_tags_from_other_plugins():
    from calibre.customize.ui import all_metadata_plugins
    for plugin in all_metadata_plugins():
        if plugin.name == GoodreadsMoreTags.name:
            continue

        if 'ignore_fields' not in plugin.prefs:
            plugin.prefs['ignore_fields'] = []
        plugin.prefs['ignore_fields'].append('tags')
Exemple #8
0
 def initialize(self):
     fields = set()
     for p in all_metadata_plugins():
         fields |= p.touched_fields
     self.fields = []
     for x in fields:
         if not x.startswith('identifier:') and x not in self.exclude:
             self.fields.append(x)
     self.fields.sort(key=lambda x:self.descs.get(x, x))
     self.reset()
Exemple #9
0
 def initialize(self):
     fields = set()
     for p in all_metadata_plugins():
         fields |= p.touched_fields
     self.fields = []
     for x in fields:
         if not x.startswith('identifier:') and x not in self.exclude:
             self.fields.append(x)
     self.fields.sort(key=lambda x: self.descs.get(x, x))
     self.reset()
Exemple #10
0
def urls_from_identifiers(identifiers):  # {{{
    identifiers = {k.lower(): v for k, v in identifiers.iteritems()}
    ans = []
    rules = msprefs['id_link_rules']
    if rules:
        formatter = EvalFormatter()
        for k, val in identifiers.iteritems():
            vals = {
                'id':
                quote(val if isinstance(val, bytes) else val.encode('utf-8')).
                decode('ascii')
            }
            items = rules.get(k) or ()
            for name, template in items:
                try:
                    url = formatter.safe_format(template, vals, '', vals)
                except Exception:
                    import traceback
                    traceback.format_exc()
                    continue
                ans.append((name, k, val, url))
    for plugin in all_metadata_plugins():
        try:
            for id_type, id_val, url in plugin.get_book_urls(identifiers):
                ans.append(
                    (plugin.get_book_url_name(id_type, id_val,
                                              url), id_type, id_val, url))
        except:
            pass
    isbn = identifiers.get('isbn', None)
    if isbn:
        ans.append(
            (isbn, 'isbn', isbn, 'http://www.worldcat.org/isbn/' + isbn))
    doi = identifiers.get('doi', None)
    if doi:
        ans.append(('DOI', 'doi', doi, 'http://dx.doi.org/' + doi))
    arxiv = identifiers.get('arxiv', None)
    if arxiv:
        ans.append(('arXiv', 'arxiv', arxiv, 'http://arxiv.org/abs/' + arxiv))
    oclc = identifiers.get('oclc', None)
    if oclc:
        ans.append(
            ('OCLC', 'oclc', oclc, 'http://www.worldcat.org/oclc/' + oclc))
    issn = check_issn(identifiers.get('issn', None))
    if issn:
        ans.append(
            (issn, 'issn', issn, 'http://www.worldcat.org/issn/' + issn))
    for k, url in identifiers.iteritems():
        if url and re.match(r'ur[il]\d*$', k) is not None:
            url = url[:8].replace('|', ':') + url[8:].replace('|', ',')
            if url.partition(':')[0].lower() in {'http', 'file', 'https'}:
                parts = urlparse(url)
                name = parts.netloc or parts.path
                ans.append((name, k, url, url))
    return ans
Exemple #11
0
def urls_from_identifiers(identifiers):  # {{{
    identifiers = {k.lower():v for k, v in identifiers.iteritems()}
    ans = []
    rules = msprefs['id_link_rules']
    if rules:
        formatter = EvalFormatter()
        for k, val in identifiers.iteritems():
            vals = {'id':quote(val if isinstance(val, bytes) else val.encode('utf-8')).decode('ascii')}
            items = rules.get(k) or ()
            for name, template in items:
                try:
                    url = formatter.safe_format(template, vals, '', vals)
                except Exception:
                    import traceback
                    traceback.format_exc()
                    continue
                ans.append((name, k, val, url))
    for plugin in all_metadata_plugins():
        try:
            for id_type, id_val, url in plugin.get_book_urls(identifiers):
                ans.append((plugin.get_book_url_name(id_type, id_val, url), id_type, id_val, url))
        except:
            pass
    isbn = identifiers.get('isbn', None)
    if isbn:
        ans.append((isbn, 'isbn', isbn,
            'http://www.worldcat.org/isbn/'+isbn))
    doi = identifiers.get('doi', None)
    if doi:
        ans.append(('DOI', 'doi', doi,
            'http://dx.doi.org/'+doi))
    arxiv = identifiers.get('arxiv', None)
    if arxiv:
        ans.append(('arXiv', 'arxiv', arxiv,
            'http://arxiv.org/abs/'+arxiv))
    oclc = identifiers.get('oclc', None)
    if oclc:
        ans.append(('OCLC', 'oclc', oclc,
            'http://www.worldcat.org/oclc/'+oclc))
    issn = check_issn(identifiers.get('issn', None))
    if issn:
        ans.append((issn, 'issn', issn,
            'http://www.worldcat.org/issn/'+issn))
    for k, url in identifiers.iteritems():
        if url and re.match(r'ur[il]\d*$', k) is not None:
            url = url[:8].replace('|', ':') + url[8:].replace('|', ',')
            if url.partition(':')[0].lower() in {'http', 'file', 'https'}:
                parts = urlparse(url)
                name = parts.netloc or parts.path
                ans.append((name, k, url, url))
    return ans
Exemple #12
0
def urls_from_identifiers(identifiers):  # {{{
    identifiers = {k.lower(): v for k, v in identifiers.iteritems()}
    ans = []
    for plugin in all_metadata_plugins():
        try:
            for id_type, id_val, url in plugin.get_book_urls(identifiers):
                ans.append(
                    (plugin.get_book_url_name(id_type, id_val,
                                              url), id_type, id_val, url))
        except:
            pass
    isbn = identifiers.get('isbn', None)
    if isbn:
        ans.append(
            (isbn, 'isbn', isbn, 'http://www.worldcat.org/isbn/' + isbn))
    doi = identifiers.get('doi', None)
    if doi:
        ans.append(('DOI', 'doi', doi, 'http://dx.doi.org/' + doi))
    arxiv = identifiers.get('arxiv', None)
    if arxiv:
        ans.append(('arXiv', 'arxiv', arxiv, 'http://arxiv.org/abs/' + arxiv))
    oclc = identifiers.get('oclc', None)
    if oclc:
        ans.append(
            ('OCLC', 'oclc', oclc, 'http://www.worldcat.org/oclc/' + oclc))
    issn = check_issn(identifiers.get('issn', None))
    if issn:
        ans.append(
            (issn, 'issn', issn, 'http://www.worldcat.org/issn/' + issn))
    for k, url in identifiers.iteritems():
        if url and re.match(r'ur[il]\d*$', k) is not None and url.partition(
                ':')[0].lower() in {'http', 'file', 'https'}:
            url = url[:8].replace('|', ':') + url[8:].replace('|', ',')
            parts = urlparse(url)
            name = parts.netloc or parts.path
            ans.append((name, k, url, url))
    return ans
Exemple #13
0
def urls_from_identifiers(identifiers):  # {{{
    identifiers = {k.lower():v for k, v in identifiers.iteritems()}
    ans = []
    for plugin in all_metadata_plugins():
        try:
            for id_type, id_val, url in plugin.get_book_urls(identifiers):
                ans.append((plugin.get_book_url_name(id_type, id_val, url), id_type, id_val, url))
        except:
            pass
    isbn = identifiers.get('isbn', None)
    if isbn:
        ans.append((isbn, 'isbn', isbn,
            'http://www.worldcat.org/isbn/'+isbn))
    doi = identifiers.get('doi', None)
    if doi:
        ans.append(('DOI', 'doi', doi,
            'http://dx.doi.org/'+doi))
    arxiv = identifiers.get('arxiv', None)
    if arxiv:
        ans.append(('arXiv', 'arxiv', arxiv,
            'http://arxiv.org/abs/'+arxiv))
    oclc = identifiers.get('oclc', None)
    if oclc:
        ans.append(('OCLC', 'oclc', oclc,
            'http://www.worldcat.org/oclc/'+oclc))
    issn = check_issn(identifiers.get('issn', None))
    if issn:
        ans.append((issn, 'issn', issn,
            'http://www.worldcat.org/issn/'+issn))
    for k, url in identifiers.iteritems():
        if url and re.match(r'ur[il]\d*$', k) is not None and url.partition(':')[0].lower() in {'http', 'file', 'https'}:
            url = url[:8].replace('|', ':') + url[8:].replace('|', ',')
            parts = urlparse(url)
            name = parts.netloc or parts.path
            ans.append((name, k, url, url))
    return ans
Exemple #14
0
def test_identify_plugin(
        name,
        tests,
        modify_plugin=lambda plugin: None,  # {{{
        fail_missing_meta=True):
    '''
    :param name: Plugin name
    :param tests: List of 2-tuples. Each two tuple is of the form (args,
                  test_funcs). args is a dict of keyword arguments to pass to
                  the identify method. test_funcs are callables that accept a
                  Metadata object and return True iff the object passes the
                  test.
    '''
    plugin = None
    for x in all_metadata_plugins():
        if x.name == name and 'identify' in x.capabilities:
            plugin = x
            break
    modify_plugin(plugin)
    prints('Testing the identify function of', plugin.name)
    prints('Using extra headers:', plugin.browser.addheaders)

    tdir, lf, log, abort = init_test(plugin.name)
    prints('Log saved to', lf)

    times = []
    for kwargs, test_funcs in tests:
        log('')
        log('#' * 80)
        log('### Running test with:', kwargs)
        log('#' * 80)
        prints('Running test with:', kwargs)
        rq = Queue()
        args = (log, rq, abort)
        start_time = time.time()
        plugin.running_a_test = True
        try:
            err = plugin.identify(*args, **kwargs)
        finally:
            plugin.running_a_test = False
        total_time = time.time() - start_time
        times.append(total_time)
        if err is not None:
            prints('identify returned an error for args', args)
            prints(err)
            break

        results = []
        while True:
            try:
                results.append(rq.get_nowait())
            except Empty:
                break

        prints('Found', len(results), 'matches:', end=' ')
        prints('Smaller relevance means better match')

        results.sort(key=plugin.identify_results_keygen(
            title=kwargs.get('title', None),
            authors=kwargs.get('authors', None),
            identifiers=kwargs.get('identifiers', {})))

        for i, mi in enumerate(results):
            prints('*' * 30, 'Relevance:', i, '*' * 30)
            if mi.rating:
                mi.rating *= 2
            prints(mi)
            prints('\nCached cover URL    :',
                   plugin.get_cached_cover_url(mi.identifiers))
            prints('*' * 75, '\n\n')

        possibles = []
        for mi in results:
            test_failed = False
            for tfunc in test_funcs:
                if not tfunc(mi):
                    test_failed = True
                    break
            if not test_failed:
                possibles.append(mi)

        if not possibles:
            prints('ERROR: No results that passed all tests were found')
            prints('Log saved to', lf)
            log.close()
            dump_log(lf)
            raise SystemExit(1)

        good = [x for x in possibles if plugin.test_fields(x) is None]
        if not good:
            prints('Failed to find', plugin.test_fields(possibles[0]))
            if fail_missing_meta:
                raise SystemExit(1)

        if results[0] is not possibles[0]:
            prints('Most relevant result failed the tests')
            raise SystemExit(1)

        if 'cover' in plugin.capabilities:
            rq = Queue()
            mi = results[0]
            plugin.download_cover(log,
                                  rq,
                                  abort,
                                  title=mi.title,
                                  authors=mi.authors,
                                  identifiers=mi.identifiers)
            results = []
            while True:
                try:
                    results.append(rq.get_nowait())
                except Empty:
                    break
            if not results and fail_missing_meta:
                prints('Cover download failed')
                raise SystemExit(1)
            elif results:
                cdata = results[0]
                cover = os.path.join(
                    tdir,
                    plugin.name.replace(' ', '') + '-%s-cover.jpg' %
                    sanitize_file_name(mi.title.replace(' ', '_')))
                with open(cover, 'wb') as f:
                    f.write(cdata[-1])

                prints('Cover downloaded to:', cover)

                if len(cdata[-1]) < 10240:
                    prints('Downloaded cover too small')
                    raise SystemExit(1)

    prints('Average time per query', sum(times) / len(times))

    if os.stat(lf).st_size > 10:
        prints('There were some errors/warnings, see log', lf)
Exemple #15
0
def test_identify_plugin(name, tests, modify_plugin=lambda plugin:None,
        fail_missing_meta=True):  # {{{
    '''
    :param name: Plugin name
    :param tests: List of 2-tuples. Each two tuple is of the form (args,
                  test_funcs). args is a dict of keyword arguments to pass to
                  the identify method. test_funcs are callables that accept a
                  Metadata object and return True iff the object passes the
                  test.
    '''
    plugin = None
    for x in all_metadata_plugins():
        if x.name == name and 'identify' in x.capabilities:
            plugin = x
            break
    modify_plugin(plugin)
    prints('Testing the identify function of', plugin.name)
    prints('Using extra headers:', plugin.browser.addheaders)

    tdir, lf, log, abort = init_test(plugin.name)
    prints('Log saved to', lf)

    times = []
    for kwargs, test_funcs in tests:
        prints('Running test with:', kwargs)
        rq = Queue()
        args = (log, rq, abort)
        start_time = time.time()
        plugin.running_a_test = True
        try:
            err = plugin.identify(*args, **kwargs)
        finally:
            plugin.running_a_test = False
        total_time = time.time() - start_time
        times.append(total_time)
        if err is not None:
            prints('identify returned an error for args', args)
            prints(err)
            break

        results = []
        while True:
            try:
                results.append(rq.get_nowait())
            except Empty:
                break

        prints('Found', len(results), 'matches:', end=' ')
        prints('Smaller relevance means better match')

        results.sort(key=plugin.identify_results_keygen(
            title=kwargs.get('title', None), authors=kwargs.get('authors',
                None), identifiers=kwargs.get('identifiers', {})))

        for i, mi in enumerate(results):
            prints('*'*30, 'Relevance:', i, '*'*30)
            prints(mi)
            prints('\nCached cover URL    :',
                    plugin.get_cached_cover_url(mi.identifiers))
            prints('*'*75, '\n\n')

        possibles = []
        for mi in results:
            test_failed = False
            for tfunc in test_funcs:
                if not tfunc(mi):
                    test_failed = True
                    break
            if not test_failed:
                possibles.append(mi)

        if not possibles:
            prints('ERROR: No results that passed all tests were found')
            prints('Log saved to', lf)
            raise SystemExit(1)

        good = [x for x in possibles if plugin.test_fields(x) is
                None]
        if not good:
            prints('Failed to find', plugin.test_fields(possibles[0]))
            if fail_missing_meta:
                raise SystemExit(1)

        if results[0] is not possibles[0]:
            prints('Most relevant result failed the tests')
            raise SystemExit(1)

        if 'cover' in plugin.capabilities:
            rq = Queue()
            mi = results[0]
            plugin.download_cover(log, rq, abort, title=mi.title,
                    authors=mi.authors, identifiers=mi.identifiers)
            results = []
            while True:
                try:
                    results.append(rq.get_nowait())
                except Empty:
                    break
            if not results and fail_missing_meta:
                prints('Cover download failed')
                raise SystemExit(1)
            elif results:
                cdata = results[0]
                cover = os.path.join(tdir, plugin.name.replace(' ',
                    '')+'-%s-cover.jpg'%sanitize_file_name2(mi.title.replace(' ',
                        '_')))
                with open(cover, 'wb') as f:
                    f.write(cdata[-1])

                prints('Cover downloaded to:', cover)

                if len(cdata[-1]) < 10240:
                    prints('Downloaded cover too small')
                    raise SystemExit(1)

    prints('Average time per query', sum(times)/len(times))

    if os.stat(lf).st_size > 10:
        prints('There were some errors/warnings, see log', lf)
Exemple #16
0
def urls_from_identifiers(identifiers, sort_results=False):  # {{{
    identifiers = {k.lower(): v for k, v in iteritems(identifiers)}
    ans = []
    keys_left = set(identifiers)

    def add(name, k, val, url):
        ans.append((name, k, val, url))
        keys_left.discard(k)

    rules = msprefs['id_link_rules']
    if rules:
        formatter = EvalFormatter()
        for k, val in iteritems(identifiers):
            val = val.replace('|', ',')
            vals = {
                'id':
                unicode_type(
                    quote(val if isinstance(val, bytes) else val.encode('utf-8'
                                                                        ))),
                'id_unquoted':
                str(val),
            }
            items = rules.get(k) or ()
            for name, template in items:
                try:
                    url = formatter.safe_format(template, vals, '', vals)
                except Exception:
                    import traceback
                    traceback.format_exc()
                    continue
                add(name, k, val, url)
    for plugin in all_metadata_plugins():
        try:
            for id_type, id_val, url in plugin.get_book_urls(identifiers):
                add(plugin.get_book_url_name(id_type, id_val, url), id_type,
                    id_val, url)
        except Exception:
            pass
    isbn = identifiers.get('isbn', None)
    if isbn:
        add(isbn, 'isbn', isbn, 'https://www.worldcat.org/isbn/' + isbn)
    doi = identifiers.get('doi', None)
    if doi:
        add('DOI', 'doi', doi, 'https://dx.doi.org/' + doi)
    arxiv = identifiers.get('arxiv', None)
    if arxiv:
        add('arXiv', 'arxiv', arxiv, 'https://arxiv.org/abs/' + arxiv)
    oclc = identifiers.get('oclc', None)
    if oclc:
        add('OCLC', 'oclc', oclc, 'https://www.worldcat.org/oclc/' + oclc)
    issn = check_issn(identifiers.get('issn', None))
    if issn:
        add(issn, 'issn', issn, 'https://www.worldcat.org/issn/' + issn)
    q = {'http', 'https', 'file'}
    for k, url in iteritems(identifiers):
        if url and re.match(r'ur[il]\d*$', k) is not None:
            url = url[:8].replace('|', ':') + url[8:].replace('|', ',')
            if url.partition(':')[0].lower() in q:
                parts = urlparse(url)
                name = parts.netloc or parts.path
                add(name, k, url, url)
    for k in tuple(keys_left):
        val = identifiers.get(k)
        if val:
            url = val[:8].replace('|', ':') + val[8:].replace('|', ',')
            if url.partition(':')[0].lower() in q:
                parts = urlparse(url)
                name = parts.netloc or parts.path
                add(name, k, url, url)
    if sort_results:

        def url_key(x):
            return primary_sort_key(str(x[0]))

        ans = sorted(ans, key=url_key)
    return ans