def __find_best_series(book, config): ''' Queries the databse to find a best guess for a series matching the given ComicBook, based on its name, year, issue number, and other text attributes. Returns SeriesRef if a reasonable guess was found, or None if one wasn't. ''' # 1. obtain SeriesRefs for this book, removing some as dictated by prefs series_refs = db.query_series_refs(book.series_s, config.ignored_searchterms_sl) series_refs = dbutils.filter_series_refs(series_refs, config.ignored_publishers_sl, config.ignored_before_year_n, config.ignored_after_year_n, config.never_ignore_threshold_n) # 2. obtain the first, second, and third best matching SeriesRefs for the # given book, if there are any. primary = None secondary = None tertiary = None if len(series_refs) > 0: mscore = MatchScore() def find_best_score(refs): return reduce( lambda x, y: x if mscore.compute_n(book, x) >= mscore. compute_n(book, y) else y, refs) if refs else None primary = find_best_score(series_refs) if primary: series_refs.remove(primary) secondary = find_best_score(series_refs) if secondary: series_refs.remove(secondary) tertiary = find_best_score(series_refs) # 3. if our book is the first (or unknown) issue, figure out if the best # matching series has a similar cover to the second or third best. # if it does, we're probably dealing with a trade paperback and a # regular issue, and we can't find the best series reliably, so we bail is_first_issue = (lambda i : not i or \ (utils.is_number(i) and float(i)==1.0))(book.issue_num_s) if is_first_issue and primary and secondary: too_similar = False SIMILARITY_THRESHOLD = __MATCH_THRESHOLD - 0.10 hash1 = __get_remote_hash(primary) hash2 = __get_remote_hash(secondary) if imagehash.similarity(hash1, hash2) > SIMILARITY_THRESHOLD: too_similar = True elif tertiary: hash3 = __get_remote_hash(tertiary) if imagehash.similarity(hash1, hash3) > SIMILARITY_THRESHOLD: too_similar = True primary = None if too_similar else primary return primary
def __init__(self, scraper, book, series_refs, search_terms_s): ''' Initializes this form. 'scraper' -> the currently running ScrapeEngine 'book' -> the ComicBook being scraped 'series_refs' -> set or list containing the SeriesRefs to display 'search_terms_s' -> the user's search string that found the series models ''' # the the shared global configuration self.__config = scraper.config # a list of SeriesRef objects that back this form; one ref per table # row, where each SeriesRef represents a series the user can pick self.__series_refs = list(series_refs) # the MatchScore object that we use to compute series match scores self.__matchscore = MatchScore() # true when the user is pressing the control key, false otherwise self.__pressing_controlkey = False; # the 'ok' button for this dialog self.__ok_button = None # the 'skip' button for this dialog self.__skip_button = None # the 'show issues' button for this dialog self.__issues_button = None # the table that displays series (on per row) for the user to pick from self.__table = None # IssueCoverPanel that shows cover art for the current selected SeriesRef self.__coverpanel = None # the index (in self.__series_refs) of the currently selected SeriesRef self.__chosen_index = None if len(series_refs) <= 0: raise Exception("do not invoke the SeriesForm with no series!") CVForm.__init__(self, scraper.comicrack.MainWindow, "seriesformLocation") self.__build_gui(book, search_terms_s); scraper.cancel_listeners.append(self.Close)