def initialize(self): self.beginResetModel() self.plugins = list(all_metadata_plugins()) self.plugins.sort(key=attrgetter('name')) self.enabled_overrides = {} self.cover_overrides = {} self.endResetModel()
def option_parser(): parser = OptionParser(_('''\ %prog [options] Fetch book metadata from online sources. You must specify at least one of title, authors or ISBN. ''' )) parser.add_option('-t', '--title', help=_('Book title')) parser.add_option('-a', '--authors', help=_('Book author(s)')) parser.add_option('-i', '--isbn', help=_('Book ISBN')) parser.add_option('-I', '--identifier', action='append', default=[], help=_( 'Identifiers such as ASIN/goodreads id etc. Can be specified multiple times for multiple identifiers.' ' For example: ') + '--identifier asin:B0082BAJA0') parser.add_option('-v', '--verbose', default=False, action='store_true', help=_('Print the log to the console (stderr)')) parser.add_option('-o', '--opf', help=_('Output the metadata in OPF format instead of human readable text.'), action='store_true', default=False) parser.add_option('-c', '--cover', help=_('Specify a filename. The cover, if available, will be saved to it. Without this option, no cover will be downloaded.')) parser.add_option('-d', '--timeout', default='30', help=_('Timeout in seconds. Default is 30')) parser.add_option('-p', '--allowed-plugin', action='append', default=[], help=_('Specify the name of a metadata download plugin to use.' ' By default, all metadata plugins will be used.' ' Can be specified multiple times for multiple plugins.' ' All plugin names: {}').format(', '.join(p.name for p in all_metadata_plugins()))) return parser
def urls_from_identifiers(identifiers): # {{{ identifiers = dict([(k.lower(), v) for k, v in identifiers.iteritems()]) ans = [] for plugin in all_metadata_plugins(): try: id_type, id_val, url = plugin.get_book_url(identifiers) ans.append((plugin.get_book_url_name(id_type, id_val, url), id_type, id_val, url)) except: pass isbn = identifiers.get('isbn', None) if isbn: ans.append((isbn, 'isbn', isbn, 'http://www.worldcat.org/isbn/'+isbn)) doi = identifiers.get('doi', None) if doi: ans.append(('DOI', 'doi', doi, 'http://dx.doi.org/'+doi)) arxiv = identifiers.get('arxiv', None) if arxiv: ans.append(('arXiv', 'arxiv', arxiv, 'http://arxiv.org/abs/'+arxiv)) oclc = identifiers.get('oclc', None) if oclc: ans.append(('OCLC', 'oclc', oclc, 'http://www.worldcat.org/oclc/'+oclc)) url = identifiers.get('uri', None) if url is None: url = identifiers.get('url', None) if url and url.startswith('http'): url = url[:8].replace('|', ':') + url[8:].replace('|', ',') parts = urlparse(url) name = parts.netloc ans.append((name, 'url', url, url)) return ans
def urls_from_identifiers(identifiers): # {{{ identifiers = dict([(k.lower(), v) for k, v in identifiers.iteritems()]) ans = [] for plugin in all_metadata_plugins(): try: id_type, id_val, url = plugin.get_book_url(identifiers) ans.append((plugin.get_book_url_name(id_type, id_val, url), id_type, id_val, url)) except: pass isbn = identifiers.get('isbn', None) if isbn: ans.append( (isbn, 'isbn', isbn, 'http://www.worldcat.org/isbn/' + isbn)) doi = identifiers.get('doi', None) if doi: ans.append(('DOI', 'doi', doi, 'http://dx.doi.org/' + doi)) arxiv = identifiers.get('arxiv', None) if arxiv: ans.append(('arXiv', 'arxiv', arxiv, 'http://arxiv.org/abs/' + arxiv)) oclc = identifiers.get('oclc', None) if oclc: ans.append( ('OCLC', 'oclc', oclc, 'http://www.worldcat.org/oclc/' + oclc)) url = identifiers.get('uri', None) if url is None: url = identifiers.get('url', None) if url and url.startswith('http'): url = url[:8].replace('|', ':') + url[8:].replace('|', ',') parts = urlparse(url) name = parts.netloc ans.append((name, 'url', url, url)) return ans
def no_tags_from_other_plugins(): from calibre.customize.ui import all_metadata_plugins for plugin in all_metadata_plugins(): if plugin.name == GoodreadsMoreTags.name: continue if 'ignore_fields' not in plugin.prefs: plugin.prefs['ignore_fields'] = [] plugin.prefs['ignore_fields'].append('tags')
def initialize(self): fields = set() for p in all_metadata_plugins(): fields |= p.touched_fields self.fields = [] for x in fields: if not x.startswith('identifier:') and x not in self.exclude: self.fields.append(x) self.fields.sort(key=lambda x:self.descs.get(x, x)) self.reset()
def initialize(self): fields = set() for p in all_metadata_plugins(): fields |= p.touched_fields self.fields = [] for x in fields: if not x.startswith('identifier:') and x not in self.exclude: self.fields.append(x) self.fields.sort(key=lambda x: self.descs.get(x, x)) self.reset()
def urls_from_identifiers(identifiers): # {{{ identifiers = {k.lower(): v for k, v in identifiers.iteritems()} ans = [] rules = msprefs['id_link_rules'] if rules: formatter = EvalFormatter() for k, val in identifiers.iteritems(): vals = { 'id': quote(val if isinstance(val, bytes) else val.encode('utf-8')). decode('ascii') } items = rules.get(k) or () for name, template in items: try: url = formatter.safe_format(template, vals, '', vals) except Exception: import traceback traceback.format_exc() continue ans.append((name, k, val, url)) for plugin in all_metadata_plugins(): try: for id_type, id_val, url in plugin.get_book_urls(identifiers): ans.append( (plugin.get_book_url_name(id_type, id_val, url), id_type, id_val, url)) except: pass isbn = identifiers.get('isbn', None) if isbn: ans.append( (isbn, 'isbn', isbn, 'http://www.worldcat.org/isbn/' + isbn)) doi = identifiers.get('doi', None) if doi: ans.append(('DOI', 'doi', doi, 'http://dx.doi.org/' + doi)) arxiv = identifiers.get('arxiv', None) if arxiv: ans.append(('arXiv', 'arxiv', arxiv, 'http://arxiv.org/abs/' + arxiv)) oclc = identifiers.get('oclc', None) if oclc: ans.append( ('OCLC', 'oclc', oclc, 'http://www.worldcat.org/oclc/' + oclc)) issn = check_issn(identifiers.get('issn', None)) if issn: ans.append( (issn, 'issn', issn, 'http://www.worldcat.org/issn/' + issn)) for k, url in identifiers.iteritems(): if url and re.match(r'ur[il]\d*$', k) is not None: url = url[:8].replace('|', ':') + url[8:].replace('|', ',') if url.partition(':')[0].lower() in {'http', 'file', 'https'}: parts = urlparse(url) name = parts.netloc or parts.path ans.append((name, k, url, url)) return ans
def urls_from_identifiers(identifiers): # {{{ identifiers = {k.lower():v for k, v in identifiers.iteritems()} ans = [] rules = msprefs['id_link_rules'] if rules: formatter = EvalFormatter() for k, val in identifiers.iteritems(): vals = {'id':quote(val if isinstance(val, bytes) else val.encode('utf-8')).decode('ascii')} items = rules.get(k) or () for name, template in items: try: url = formatter.safe_format(template, vals, '', vals) except Exception: import traceback traceback.format_exc() continue ans.append((name, k, val, url)) for plugin in all_metadata_plugins(): try: for id_type, id_val, url in plugin.get_book_urls(identifiers): ans.append((plugin.get_book_url_name(id_type, id_val, url), id_type, id_val, url)) except: pass isbn = identifiers.get('isbn', None) if isbn: ans.append((isbn, 'isbn', isbn, 'http://www.worldcat.org/isbn/'+isbn)) doi = identifiers.get('doi', None) if doi: ans.append(('DOI', 'doi', doi, 'http://dx.doi.org/'+doi)) arxiv = identifiers.get('arxiv', None) if arxiv: ans.append(('arXiv', 'arxiv', arxiv, 'http://arxiv.org/abs/'+arxiv)) oclc = identifiers.get('oclc', None) if oclc: ans.append(('OCLC', 'oclc', oclc, 'http://www.worldcat.org/oclc/'+oclc)) issn = check_issn(identifiers.get('issn', None)) if issn: ans.append((issn, 'issn', issn, 'http://www.worldcat.org/issn/'+issn)) for k, url in identifiers.iteritems(): if url and re.match(r'ur[il]\d*$', k) is not None: url = url[:8].replace('|', ':') + url[8:].replace('|', ',') if url.partition(':')[0].lower() in {'http', 'file', 'https'}: parts = urlparse(url) name = parts.netloc or parts.path ans.append((name, k, url, url)) return ans
def urls_from_identifiers(identifiers): # {{{ identifiers = {k.lower(): v for k, v in identifiers.iteritems()} ans = [] for plugin in all_metadata_plugins(): try: for id_type, id_val, url in plugin.get_book_urls(identifiers): ans.append( (plugin.get_book_url_name(id_type, id_val, url), id_type, id_val, url)) except: pass isbn = identifiers.get('isbn', None) if isbn: ans.append( (isbn, 'isbn', isbn, 'http://www.worldcat.org/isbn/' + isbn)) doi = identifiers.get('doi', None) if doi: ans.append(('DOI', 'doi', doi, 'http://dx.doi.org/' + doi)) arxiv = identifiers.get('arxiv', None) if arxiv: ans.append(('arXiv', 'arxiv', arxiv, 'http://arxiv.org/abs/' + arxiv)) oclc = identifiers.get('oclc', None) if oclc: ans.append( ('OCLC', 'oclc', oclc, 'http://www.worldcat.org/oclc/' + oclc)) issn = check_issn(identifiers.get('issn', None)) if issn: ans.append( (issn, 'issn', issn, 'http://www.worldcat.org/issn/' + issn)) for k, url in identifiers.iteritems(): if url and re.match(r'ur[il]\d*$', k) is not None and url.partition( ':')[0].lower() in {'http', 'file', 'https'}: url = url[:8].replace('|', ':') + url[8:].replace('|', ',') parts = urlparse(url) name = parts.netloc or parts.path ans.append((name, k, url, url)) return ans
def urls_from_identifiers(identifiers): # {{{ identifiers = {k.lower():v for k, v in identifiers.iteritems()} ans = [] for plugin in all_metadata_plugins(): try: for id_type, id_val, url in plugin.get_book_urls(identifiers): ans.append((plugin.get_book_url_name(id_type, id_val, url), id_type, id_val, url)) except: pass isbn = identifiers.get('isbn', None) if isbn: ans.append((isbn, 'isbn', isbn, 'http://www.worldcat.org/isbn/'+isbn)) doi = identifiers.get('doi', None) if doi: ans.append(('DOI', 'doi', doi, 'http://dx.doi.org/'+doi)) arxiv = identifiers.get('arxiv', None) if arxiv: ans.append(('arXiv', 'arxiv', arxiv, 'http://arxiv.org/abs/'+arxiv)) oclc = identifiers.get('oclc', None) if oclc: ans.append(('OCLC', 'oclc', oclc, 'http://www.worldcat.org/oclc/'+oclc)) issn = check_issn(identifiers.get('issn', None)) if issn: ans.append((issn, 'issn', issn, 'http://www.worldcat.org/issn/'+issn)) for k, url in identifiers.iteritems(): if url and re.match(r'ur[il]\d*$', k) is not None and url.partition(':')[0].lower() in {'http', 'file', 'https'}: url = url[:8].replace('|', ':') + url[8:].replace('|', ',') parts = urlparse(url) name = parts.netloc or parts.path ans.append((name, k, url, url)) return ans
def test_identify_plugin( name, tests, modify_plugin=lambda plugin: None, # {{{ fail_missing_meta=True): ''' :param name: Plugin name :param tests: List of 2-tuples. Each two tuple is of the form (args, test_funcs). args is a dict of keyword arguments to pass to the identify method. test_funcs are callables that accept a Metadata object and return True iff the object passes the test. ''' plugin = None for x in all_metadata_plugins(): if x.name == name and 'identify' in x.capabilities: plugin = x break modify_plugin(plugin) prints('Testing the identify function of', plugin.name) prints('Using extra headers:', plugin.browser.addheaders) tdir, lf, log, abort = init_test(plugin.name) prints('Log saved to', lf) times = [] for kwargs, test_funcs in tests: log('') log('#' * 80) log('### Running test with:', kwargs) log('#' * 80) prints('Running test with:', kwargs) rq = Queue() args = (log, rq, abort) start_time = time.time() plugin.running_a_test = True try: err = plugin.identify(*args, **kwargs) finally: plugin.running_a_test = False total_time = time.time() - start_time times.append(total_time) if err is not None: prints('identify returned an error for args', args) prints(err) break results = [] while True: try: results.append(rq.get_nowait()) except Empty: break prints('Found', len(results), 'matches:', end=' ') prints('Smaller relevance means better match') results.sort(key=plugin.identify_results_keygen( title=kwargs.get('title', None), authors=kwargs.get('authors', None), identifiers=kwargs.get('identifiers', {}))) for i, mi in enumerate(results): prints('*' * 30, 'Relevance:', i, '*' * 30) if mi.rating: mi.rating *= 2 prints(mi) prints('\nCached cover URL :', plugin.get_cached_cover_url(mi.identifiers)) prints('*' * 75, '\n\n') possibles = [] for mi in results: test_failed = False for tfunc in test_funcs: if not tfunc(mi): test_failed = True break if not test_failed: possibles.append(mi) if not possibles: prints('ERROR: No results that passed all tests were found') prints('Log saved to', lf) log.close() dump_log(lf) raise SystemExit(1) good = [x for x in possibles if plugin.test_fields(x) is None] if not good: prints('Failed to find', plugin.test_fields(possibles[0])) if fail_missing_meta: raise SystemExit(1) if results[0] is not possibles[0]: prints('Most relevant result failed the tests') raise SystemExit(1) if 'cover' in plugin.capabilities: rq = Queue() mi = results[0] plugin.download_cover(log, rq, abort, title=mi.title, authors=mi.authors, identifiers=mi.identifiers) results = [] while True: try: results.append(rq.get_nowait()) except Empty: break if not results and fail_missing_meta: prints('Cover download failed') raise SystemExit(1) elif results: cdata = results[0] cover = os.path.join( tdir, plugin.name.replace(' ', '') + '-%s-cover.jpg' % sanitize_file_name(mi.title.replace(' ', '_'))) with open(cover, 'wb') as f: f.write(cdata[-1]) prints('Cover downloaded to:', cover) if len(cdata[-1]) < 10240: prints('Downloaded cover too small') raise SystemExit(1) prints('Average time per query', sum(times) / len(times)) if os.stat(lf).st_size > 10: prints('There were some errors/warnings, see log', lf)
def test_identify_plugin(name, tests, modify_plugin=lambda plugin:None, fail_missing_meta=True): # {{{ ''' :param name: Plugin name :param tests: List of 2-tuples. Each two tuple is of the form (args, test_funcs). args is a dict of keyword arguments to pass to the identify method. test_funcs are callables that accept a Metadata object and return True iff the object passes the test. ''' plugin = None for x in all_metadata_plugins(): if x.name == name and 'identify' in x.capabilities: plugin = x break modify_plugin(plugin) prints('Testing the identify function of', plugin.name) prints('Using extra headers:', plugin.browser.addheaders) tdir, lf, log, abort = init_test(plugin.name) prints('Log saved to', lf) times = [] for kwargs, test_funcs in tests: prints('Running test with:', kwargs) rq = Queue() args = (log, rq, abort) start_time = time.time() plugin.running_a_test = True try: err = plugin.identify(*args, **kwargs) finally: plugin.running_a_test = False total_time = time.time() - start_time times.append(total_time) if err is not None: prints('identify returned an error for args', args) prints(err) break results = [] while True: try: results.append(rq.get_nowait()) except Empty: break prints('Found', len(results), 'matches:', end=' ') prints('Smaller relevance means better match') results.sort(key=plugin.identify_results_keygen( title=kwargs.get('title', None), authors=kwargs.get('authors', None), identifiers=kwargs.get('identifiers', {}))) for i, mi in enumerate(results): prints('*'*30, 'Relevance:', i, '*'*30) prints(mi) prints('\nCached cover URL :', plugin.get_cached_cover_url(mi.identifiers)) prints('*'*75, '\n\n') possibles = [] for mi in results: test_failed = False for tfunc in test_funcs: if not tfunc(mi): test_failed = True break if not test_failed: possibles.append(mi) if not possibles: prints('ERROR: No results that passed all tests were found') prints('Log saved to', lf) raise SystemExit(1) good = [x for x in possibles if plugin.test_fields(x) is None] if not good: prints('Failed to find', plugin.test_fields(possibles[0])) if fail_missing_meta: raise SystemExit(1) if results[0] is not possibles[0]: prints('Most relevant result failed the tests') raise SystemExit(1) if 'cover' in plugin.capabilities: rq = Queue() mi = results[0] plugin.download_cover(log, rq, abort, title=mi.title, authors=mi.authors, identifiers=mi.identifiers) results = [] while True: try: results.append(rq.get_nowait()) except Empty: break if not results and fail_missing_meta: prints('Cover download failed') raise SystemExit(1) elif results: cdata = results[0] cover = os.path.join(tdir, plugin.name.replace(' ', '')+'-%s-cover.jpg'%sanitize_file_name2(mi.title.replace(' ', '_'))) with open(cover, 'wb') as f: f.write(cdata[-1]) prints('Cover downloaded to:', cover) if len(cdata[-1]) < 10240: prints('Downloaded cover too small') raise SystemExit(1) prints('Average time per query', sum(times)/len(times)) if os.stat(lf).st_size > 10: prints('There were some errors/warnings, see log', lf)
def urls_from_identifiers(identifiers, sort_results=False): # {{{ identifiers = {k.lower(): v for k, v in iteritems(identifiers)} ans = [] keys_left = set(identifiers) def add(name, k, val, url): ans.append((name, k, val, url)) keys_left.discard(k) rules = msprefs['id_link_rules'] if rules: formatter = EvalFormatter() for k, val in iteritems(identifiers): val = val.replace('|', ',') vals = { 'id': unicode_type( quote(val if isinstance(val, bytes) else val.encode('utf-8' ))), 'id_unquoted': str(val), } items = rules.get(k) or () for name, template in items: try: url = formatter.safe_format(template, vals, '', vals) except Exception: import traceback traceback.format_exc() continue add(name, k, val, url) for plugin in all_metadata_plugins(): try: for id_type, id_val, url in plugin.get_book_urls(identifiers): add(plugin.get_book_url_name(id_type, id_val, url), id_type, id_val, url) except Exception: pass isbn = identifiers.get('isbn', None) if isbn: add(isbn, 'isbn', isbn, 'https://www.worldcat.org/isbn/' + isbn) doi = identifiers.get('doi', None) if doi: add('DOI', 'doi', doi, 'https://dx.doi.org/' + doi) arxiv = identifiers.get('arxiv', None) if arxiv: add('arXiv', 'arxiv', arxiv, 'https://arxiv.org/abs/' + arxiv) oclc = identifiers.get('oclc', None) if oclc: add('OCLC', 'oclc', oclc, 'https://www.worldcat.org/oclc/' + oclc) issn = check_issn(identifiers.get('issn', None)) if issn: add(issn, 'issn', issn, 'https://www.worldcat.org/issn/' + issn) q = {'http', 'https', 'file'} for k, url in iteritems(identifiers): if url and re.match(r'ur[il]\d*$', k) is not None: url = url[:8].replace('|', ':') + url[8:].replace('|', ',') if url.partition(':')[0].lower() in q: parts = urlparse(url) name = parts.netloc or parts.path add(name, k, url, url) for k in tuple(keys_left): val = identifiers.get(k) if val: url = val[:8].replace('|', ':') + val[8:].replace('|', ',') if url.partition(':')[0].lower() in q: parts = urlparse(url) name = parts.netloc or parts.path add(name, k, url, url) if sort_results: def url_key(x): return primary_sort_key(str(x[0])) ans = sorted(ans, key=url_key) return ans