def merge_result(oldmi, newmi, ensure_fields=None): dummy = Metadata(_('Unknown')) for f in msprefs['ignore_fields']: if ':' in f or (ensure_fields and f in ensure_fields): continue setattr(newmi, f, getattr(dummy, f)) fields = set() for plugin in metadata_plugins(['identify']): fields |= plugin.touched_fields def is_equal(x, y): if hasattr(x, 'tzinfo'): x = as_utc(x) if hasattr(y, 'tzinfo'): y = as_utc(y) return x == y for f in fields: # Optimize so that set_metadata does not have to do extra work later if not f.startswith('identifier:'): if (not newmi.is_null(f) and is_equal(getattr(newmi, f), getattr(oldmi, f))): setattr(newmi, f, getattr(dummy, f)) return newmi
def load_caches(dump): from calibre.customize.ui import metadata_plugins plugins = list(metadata_plugins(['identify'])) for p in plugins: cache = dump.get(p.name, None) if cache: p.load_caches(cache)
def get_cached_cover_urls(mi): from calibre.customize.ui import metadata_plugins plugins = list(metadata_plugins(['identify'])) for p in plugins: url = p.get_cached_cover_url(mi.identifiers) if url: yield (p, url)
def __init__(self, log, parent=None): QWidget.__init__(self, parent) self.log = log self.abort = Event() self.caches = {} self.l = l = QGridLayout() self.setLayout(l) names = [ '<b>' + p.name + '</b>' for p in metadata_plugins(['identify']) if p.is_configured() ] self.top = QLabel('<p>' + _('calibre is downloading metadata from: ') + ', '.join(names)) self.top.setWordWrap(True) l.addWidget(self.top, 0, 0) self.results_view = ResultsView(self) self.results_view.book_selected.connect(self.emit_book_selected) self.get_result = self.results_view.get_result l.addWidget(self.results_view, 1, 0) self.comments_view = Comments(self) l.addWidget(self.comments_view, 1, 1) self.results_view.show_details_signal.connect( self.comments_view.show_data) self.query = QLabel('download starting...') self.query.setWordWrap(True) l.addWidget(self.query, 2, 0, 1, 2) self.comments_view.show_wait()
def reset_covers(self, do_reset=True): self.covers = [self.get_item(_("Current cover"), self.cc)] self.plugin_map = {} for i, plugin in enumerate(metadata_plugins(["cover"])): self.covers.append((plugin.name + "\n" + _("Searching..."), (self.blank), None, True)) self.plugin_map[plugin] = [i + 1] if do_reset: self.beginResetModel(), self.endResetModel()
def reset_covers(self, do_reset=True): self.covers = [self.get_item(_('Current cover'), self.cc)] self.plugin_map = {} for i, plugin in enumerate(metadata_plugins(['cover'])): self.covers.append((plugin.name + '\n' + _('Searching...'), QVariant(self.blank), None, True)) self.plugin_map[plugin] = [i + 1] if do_reset: self.reset()
def reset_covers(self, do_reset=True): self.covers = [self.get_item(_('Current cover'), self.cc)] self.plugin_map = {} for i, plugin in enumerate(metadata_plugins(['cover'])): self.covers.append((plugin.name+'\n'+_('Searching...'), QVariant(self.blank), None, True)) self.plugin_map[plugin] = i+1 if do_reset: self.reset()
def fake_run(self): images = [ 'donate.png', 'config.png', 'column.png', 'eject.png', ] time.sleep(2) for pl, im in zip(metadata_plugins(['cover']), images): self.rq.put((pl.name, 1, 1, 'png', I(im, data=True)))
def __init__(self, log, parent=None): QWidget.__init__(self, parent) self.log = log self.abort = Event() self.caches = {} self.l = l = QGridLayout() self.setLayout(l) names = [ '<b>' + p.name + '</b>' for p in metadata_plugins(['identify']) if p.is_configured() ] self.top = QLabel('<p>' + _('calibre is downloading metadata from: ') + ', '.join(names)) self.top.setWordWrap(True) l.addWidget(self.top, 0, 0) self.results_view = ResultsView(self) self.results_view.book_selected.connect(self.emit_book_selected) self.get_result = self.results_view.get_result l.addWidget(self.results_view, 1, 0) self.comments_view = Comments(self) l.addWidget(self.comments_view, 1, 1) self.results_view.show_details_signal.connect( self.comments_view.show_data) self.query = QLabel('download starting...') f = self.query.font() f.setPointSize(f.pointSize() - 2) self.query.setFont(f) self.query.setWordWrap(True) l.addWidget(self.query, 2, 0, 1, 2) self.comments_view.show_data('<h2>' + _('Please wait') + '<br><span id="dots">.</span></h2>' + ''' <script type="text/javascript"> window.onload=function(){ var dotspan = document.getElementById('dots'); window.setInterval(function(){ if(dotspan.textContent == '............'){ dotspan.textContent = '.'; } else{ dotspan.textContent += '.'; } }, 400); } </script> ''')
def __init__(self, log, parent=None): QWidget.__init__(self, parent) self.log = log self.abort = Event() self.caches = {} self.l = l = QGridLayout() self.setLayout(l) names = ["<b>" + p.name + "</b>" for p in metadata_plugins(["identify"]) if p.is_configured()] self.top = QLabel("<p>" + _("calibre is downloading metadata from: ") + ", ".join(names)) self.top.setWordWrap(True) l.addWidget(self.top, 0, 0) self.results_view = ResultsView(self) self.results_view.book_selected.connect(self.emit_book_selected) self.get_result = self.results_view.get_result l.addWidget(self.results_view, 1, 0) self.comments_view = Comments(self) l.addWidget(self.comments_view, 1, 1) self.results_view.show_details_signal.connect(self.comments_view.show_data) self.query = QLabel("download starting...") f = self.query.font() f.setPointSize(f.pointSize() - 2) self.query.setFont(f) self.query.setWordWrap(True) l.addWidget(self.query, 2, 0, 1, 2) self.comments_view.show_data( "<h2>" + _("Please wait") + '<br><span id="dots">.</span></h2>' + """ <script type="text/javascript"> window.onload=function(){ var dotspan = document.getElementById('dots'); window.setInterval(function(){ if(dotspan.textContent == '............'){ dotspan.textContent = '.'; } else{ dotspan.textContent += '.'; } }, 400); } </script> """ )
def __init__(self, log, parent=None): QWidget.__init__(self, parent) self.log = log self.abort = Event() self.caches = {} self.l = l = QVBoxLayout(self) names = [ '<b>' + p.name + '</b>' for p in metadata_plugins(['identify']) if p.is_configured() ] self.top = QLabel('<p>' + _('calibre is downloading metadata from: ') + ', '.join(names)) self.top.setWordWrap(True) l.addWidget(self.top) self.splitter = s = QSplitter(self) s.setChildrenCollapsible(False) l.addWidget(s, 100) self.results_view = ResultsView(self) self.results_view.book_selected.connect(self.emit_book_selected) self.get_result = self.results_view.get_result s.addWidget(self.results_view) self.comments_view = Comments(self) s.addWidget(self.comments_view) s.setStretchFactor(0, 2) s.setStretchFactor(1, 1) self.results_view.show_details_signal.connect( self.comments_view.show_data) self.query = QLabel('download starting...') self.query.setWordWrap(True) self.query.setTextFormat(Qt.TextFormat.PlainText) l.addWidget(self.query) self.comments_view.show_wait() state = gprefs.get('metadata-download-identify-widget-splitter-state') if state is not None: s.restoreState(state)
def dump_caches(): from calibre.customize.ui import metadata_plugins return {p.name:p.dump_caches() for p in metadata_plugins(['identify'])}
def test_identify_plugin(name, tests, modify_plugin=lambda plugin: None, fail_missing_meta=True): # {{{ ''' :param name: Plugin name :param tests: List of 2-tuples. Each two tuple is of the form (args, test_funcs). args is a dict of keyword arguments to pass to the identify method. test_funcs are callables that accept a Metadata object and return True iff the object passes the test. ''' plugin = None for x in metadata_plugins(['identify']): if x.name == name: plugin = x break modify_plugin(plugin) prints('Testing the identify function of', plugin.name) prints('Using extra headers:', plugin.browser.addheaders) tdir, lf, log, abort = init_test(plugin.name) prints('Log saved to', lf) times = [] for kwargs, test_funcs in tests: prints('Running test with:', kwargs) rq = Queue() args = (log, rq, abort) start_time = time.time() plugin.running_a_test = True try: err = plugin.identify(*args, **kwargs) finally: plugin.running_a_test = False total_time = time.time() - start_time times.append(total_time) if err is not None: prints('identify returned an error for args', args) prints(err) break results = [] while True: try: results.append(rq.get_nowait()) except Empty: break prints('Found', len(results), 'matches:', end=' ') prints('Smaller relevance means better match') results.sort(key=plugin.identify_results_keygen( title=kwargs.get('title', None), authors=kwargs.get('authors', None), identifiers=kwargs.get('identifiers', {}))) for i, mi in enumerate(results): prints('*' * 30, 'Relevance:', i, '*' * 30) prints(mi) prints('\nCached cover URL :', plugin.get_cached_cover_url(mi.identifiers)) prints('*' * 75, '\n\n') possibles = [] for mi in results: test_failed = False for tfunc in test_funcs: if not tfunc(mi): test_failed = True break if not test_failed: possibles.append(mi) if not possibles: prints('ERROR: No results that passed all tests were found') prints('Log saved to', lf) raise SystemExit(1) good = [x for x in possibles if plugin.test_fields(x) is None] if not good: prints('Failed to find', plugin.test_fields(possibles[0])) if fail_missing_meta: raise SystemExit(1) if results[0] is not possibles[0]: prints('Most relevant result failed the tests') raise SystemExit(1) if 'cover' in plugin.capabilities: rq = Queue() mi = results[0] plugin.download_cover(log, rq, abort, title=mi.title, authors=mi.authors, identifiers=mi.identifiers) results = [] while True: try: results.append(rq.get_nowait()) except Empty: break if not results and fail_missing_meta: prints('Cover download failed') raise SystemExit(1) elif results: cdata = results[0] cover = os.path.join( tdir, plugin.name.replace(' ', '') + '-%s-cover.jpg' % sanitize_file_name2(mi.title.replace(' ', '_'))) with open(cover, 'wb') as f: f.write(cdata[-1]) prints('Cover downloaded to:', cover) if len(cdata[-1]) < 10240: prints('Downloaded cover too small') raise SystemExit(1) prints('Average time per query', sum(times) / len(times)) if os.stat(lf).st_size > 10: prints('There were some errors/warnings, see log', lf)
def dump_caches(): from calibre.customize.ui import metadata_plugins return {p.name: p.dump_caches() for p in metadata_plugins(['identify'])}
def run_download(log, results, abort, title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False): ''' Run the cover download, putting results into the queue :param:`results`. Each result is a tuple of the form: (plugin, width, height, fmt, bytes) ''' if title == _('Unknown'): title = None if authors == [_('Unknown')]: authors = None plugins = [p for p in metadata_plugins(['cover']) if p.is_configured()] rq = Queue() workers = [Worker(p, abort, title, authors, identifiers, timeout, rq, get_best_cover=get_best_cover) for p in plugins] for w in workers: w.start() first_result_at = None wait_time = msprefs['wait_after_first_cover_result'] found_results = {} start_time = time.time() # Use a global timeout to workaround misbehaving plugins that hang while time.time() - start_time < 301: time.sleep(0.1) try: x = rq.get_nowait() result = process_result(log, x) if result is not None: results.put(result) found_results[result[0]] = result if first_result_at is not None: first_result_at = time.time() except Empty: pass if not is_worker_alive(workers): break if first_result_at is not None and time.time() - first_result_at > wait_time: log('Not waiting for any more results') abort.set() if abort.is_set(): break while True: try: x = rq.get_nowait() result = process_result(log, x) if result is not None: results.put(result) found_results[result[0]] = result except Empty: break for w in workers: wlog = w.buf.getvalue().strip() log('\n'+'*'*30, w.plugin.name, 'Covers', '*'*30) log('Request extra headers:', w.plugin.browser.addheaders) if w.plugin in found_results: result = found_results[w.plugin] log('Downloaded cover:', '%dx%d'%(result[1], result[2])) else: log('Failed to download valid cover') if w.time_spent is None: log('Download aborted') else: log('Took', w.time_spent, 'seconds') if wlog: log(wlog) log('\n'+'*'*80)
def identify(log, abort, # {{{ title=None, authors=None, identifiers={}, timeout=30, allowed_plugins=None): if title == _('Unknown'): title = None if authors == [_('Unknown')]: authors = None start_time = time.time() plugins = [p for p in metadata_plugins(['identify']) if p.is_configured() and (allowed_plugins is None or p.name in allowed_plugins)] kwargs = { 'title': title, 'authors': authors, 'identifiers': identifiers, 'timeout': timeout, } log('Running identify query with parameters:') log(kwargs) log('Using plugins:', ', '.join(['%s %s' % (p.name, p.version) for p in plugins])) log('The log from individual plugins is below') workers = [Worker(p, kwargs, abort) for p in plugins] for w in workers: w.start() first_result_at = None results = {} for p in plugins: results[p] = [] logs = dict([(w.plugin, w.buf) for w in workers]) def get_results(): found = False for w in workers: try: result = w.rq.get_nowait() except Empty: pass else: results[w.plugin].append(result) found = True return found wait_time = msprefs['wait_after_first_identify_result'] while True: time.sleep(0.2) if get_results() and first_result_at is None: first_result_at = time.time() if not is_worker_alive(workers): break if (first_result_at is not None and time.time() - first_result_at > wait_time): log.warn('Not waiting any longer for more results. Still running' ' sources:') for worker in workers: if worker.is_alive(): log.debug('\t' + worker.name) abort.set() break while not abort.is_set() and get_results(): pass sort_kwargs = dict(kwargs) for k in list(sort_kwargs.iterkeys()): if k not in ('title', 'authors', 'identifiers'): sort_kwargs.pop(k) longest, lp = -1, '' for plugin, presults in results.iteritems(): presults.sort(key=plugin.identify_results_keygen(**sort_kwargs)) # Throw away lower priority results from the same source that have exactly the same # title and authors as a higher priority result filter_results = set() filtered_results = [] for r in presults: key = (r.title, tuple(r.authors)) if key not in filter_results: filtered_results.append(r) filter_results.add(key) results[plugin] = presults = filtered_results plog = logs[plugin].getvalue().strip() log('\n'+'*'*30, plugin.name, '%s' % (plugin.version,), '*'*30) log('Found %d results'%len(presults)) time_spent = getattr(plugin, 'dl_time_spent', None) if time_spent is None: log('Downloading was aborted') longest, lp = -1, plugin.name else: log('Downloading from', plugin.name, 'took', time_spent) if time_spent > longest: longest, lp = time_spent, plugin.name for r in presults: log('\n\n---') try: log(unicode(r)) except TypeError: log(repr(r)) if plog: log(plog) log('\n'+'*'*80) dummy = Metadata(_('Unknown')) for i, result in enumerate(presults): for f in plugin.prefs['ignore_fields']: if ':' not in f: setattr(result, f, getattr(dummy, f)) if f == 'series': result.series_index = dummy.series_index result.relevance_in_source = i result.has_cached_cover_url = ( plugin.cached_cover_url_is_reliable and plugin.get_cached_cover_url(result.identifiers) is not None) result.identify_plugin = plugin if msprefs['txt_comments']: if plugin.has_html_comments and result.comments: result.comments = html2text(result.comments) log('The identify phase took %.2f seconds'%(time.time() - start_time)) log('The longest time (%f) was taken by:'%longest, lp) log('Merging results from different sources and finding earliest ', 'publication dates from the worldcat.org service') start_time = time.time() results = merge_identify_results(results, log) log('We have %d merged results, merging took: %.2f seconds' % (len(results), time.time() - start_time)) tm_rules = msprefs['tag_map_rules'] if tm_rules: from calibre.ebooks.metadata.tag_mapper import map_tags max_tags = msprefs['max_tags'] for r in results: if tm_rules: r.tags = map_tags(r.tags, tm_rules) r.tags = r.tags[:max_tags] if getattr(r.pubdate, 'year', 2000) <= UNDEFINED_DATE.year: r.pubdate = None if msprefs['swap_author_names']: for r in results: def swap_to_ln_fn(a): if ',' in a: return a parts = a.split(None) if len(parts) <= 1: return a surname = parts[-1] return '%s, %s' % (surname, ' '.join(parts[:-1])) r.authors = [swap_to_ln_fn(a) for a in r.authors] return results
def fake_run(self): images = ['donate.png', 'config.png', 'column.png', 'eject.png', ] time.sleep(2) for pl, im in zip(metadata_plugins(['cover']), images): self.rq.put((pl.name, 1, 1, 'png', I(im, data=True)))
def fake_run(self): images = ["donate.png", "config.png", "column.png", "eject.png"] time.sleep(2) for pl, im in zip(metadata_plugins(["cover"]), images): self.rq.put((pl.name, 1, 1, "png", I(im, data=True)))
def run_download(log, results, abort, title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False): ''' Run the cover download, putting results into the queue :param:`results`. Each result is a tuple of the form: (plugin, width, height, fmt, bytes) ''' if title == _('Unknown'): title = None if authors == [_('Unknown')]: authors = None plugins = [p for p in metadata_plugins(['cover']) if p.is_configured()] rq = Queue() workers = [ Worker(p, abort, title, authors, identifiers, timeout, rq, get_best_cover=get_best_cover) for p in plugins ] for w in workers: w.start() first_result_at = None wait_time = msprefs['wait_after_first_cover_result'] found_results = {} start_time = time.time( ) # Use a global timeout to workaround misbehaving plugins that hang while time.time() - start_time < 301: time.sleep(0.1) try: x = rq.get_nowait() result = process_result(log, x) if result is not None: results.put(result) found_results[result[0]] = result if first_result_at is not None: first_result_at = time.time() except Empty: pass if not is_worker_alive(workers): break if first_result_at is not None and time.time( ) - first_result_at > wait_time: log('Not waiting for any more results') abort.set() if abort.is_set(): break while True: try: x = rq.get_nowait() result = process_result(log, x) if result is not None: results.put(result) found_results[result[0]] = result except Empty: break for w in workers: wlog = w.buf.getvalue().strip() log('\n' + '*' * 30, w.plugin.name, 'Covers', '*' * 30) log('Request extra headers:', w.plugin.browser.addheaders) if w.plugin in found_results: result = found_results[w.plugin] log('Downloaded cover:', '%dx%d' % (result[1], result[2])) else: log('Failed to download valid cover') if w.time_spent is None: log('Download aborted') else: log('Took', w.time_spent, 'seconds') if wlog: log(wlog) log('\n' + '*' * 80)
def test_identify_plugin(name, tests, modify_plugin=lambda plugin:None, fail_missing_meta=True): # {{{ ''' :param name: Plugin name :param tests: List of 2-tuples. Each two tuple is of the form (args, test_funcs). args is a dict of keyword arguments to pass to the identify method. test_funcs are callables that accept a Metadata object and return True iff the object passes the test. ''' plugin = None for x in metadata_plugins(['identify']): if x.name == name: plugin = x break modify_plugin(plugin) prints('Testing the identify function of', plugin.name) prints('Using extra headers:', plugin.browser.addheaders) tdir, lf, log, abort = init_test(plugin.name) prints('Log saved to', lf) times = [] for kwargs, test_funcs in tests: prints('Running test with:', kwargs) rq = Queue() args = (log, rq, abort) start_time = time.time() plugin.running_a_test = True try: err = plugin.identify(*args, **kwargs) finally: plugin.running_a_test = False total_time = time.time() - start_time times.append(total_time) if err is not None: prints('identify returned an error for args', args) prints(err) break results = [] while True: try: results.append(rq.get_nowait()) except Empty: break prints('Found', len(results), 'matches:', end=' ') prints('Smaller relevance means better match') results.sort(key=plugin.identify_results_keygen( title=kwargs.get('title', None), authors=kwargs.get('authors', None), identifiers=kwargs.get('identifiers', {}))) for i, mi in enumerate(results): prints('*'*30, 'Relevance:', i, '*'*30) prints(mi) prints('\nCached cover URL :', plugin.get_cached_cover_url(mi.identifiers)) prints('*'*75, '\n\n') possibles = [] for mi in results: test_failed = False for tfunc in test_funcs: if not tfunc(mi): test_failed = True break if not test_failed: possibles.append(mi) if not possibles: prints('ERROR: No results that passed all tests were found') prints('Log saved to', lf) raise SystemExit(1) good = [x for x in possibles if plugin.test_fields(x) is None] if not good: prints('Failed to find', plugin.test_fields(possibles[0])) if fail_missing_meta: raise SystemExit(1) if results[0] is not possibles[0]: prints('Most relevant result failed the tests') raise SystemExit(1) if 'cover' in plugin.capabilities: rq = Queue() mi = results[0] plugin.download_cover(log, rq, abort, title=mi.title, authors=mi.authors, identifiers=mi.identifiers) results = [] while True: try: results.append(rq.get_nowait()) except Empty: break if not results and fail_missing_meta: prints('Cover download failed') raise SystemExit(1) elif results: cdata = results[0] cover = os.path.join(tdir, plugin.name.replace(' ', '')+'-%s-cover.jpg'%sanitize_file_name2(mi.title.replace(' ', '_'))) with open(cover, 'wb') as f: f.write(cdata[-1]) prints('Cover downloaded to:', cover) if len(cdata[-1]) < 10240: prints('Downloaded cover too small') raise SystemExit(1) prints('Average time per query', sum(times)/len(times)) if os.stat(lf).st_size > 10: prints('There were some errors/warnings, see log', lf)