def __find_best_series(book, config): ''' Queries the databse to find a best guess for a series matching the given ComicBook, based on its name, year, issue number, and other text attributes. Returns SeriesRef if a reasonable guess was found, or None if one wasn't. ''' # 1. obtain SeriesRefs for this book, removing some as dictated by prefs series_refs = db.query_series_refs(book.series_s, config.ignored_searchterms_sl) series_refs = dbutils.filter_series_refs(series_refs, config.ignored_publishers_sl, config.ignored_before_year_n, config.ignored_after_year_n, config.never_ignore_threshold_n) # 2. obtain the first, second, and third best matching SeriesRefs for the # given book, if there are any. primary = None secondary = None tertiary = None if len(series_refs) > 0: mscore = MatchScore() def find_best_score(refs): return reduce( lambda x, y: x if mscore.compute_n(book, x) >= mscore. compute_n(book, y) else y, refs) if refs else None primary = find_best_score(series_refs) if primary: series_refs.remove(primary) secondary = find_best_score(series_refs) if secondary: series_refs.remove(secondary) tertiary = find_best_score(series_refs) # 3. if our book is the first (or unknown) issue, figure out if the best # matching series has a similar cover to the second or third best. # if it does, we're probably dealing with a trade paperback and a # regular issue, and we can't find the best series reliably, so we bail is_first_issue = (lambda i : not i or \ (utils.is_number(i) and float(i)==1.0))(book.issue_num_s) if is_first_issue and primary and secondary: too_similar = False SIMILARITY_THRESHOLD = __MATCH_THRESHOLD - 0.10 hash1 = __get_remote_hash(primary) hash2 = __get_remote_hash(secondary) if imagehash.similarity(hash1, hash2) > SIMILARITY_THRESHOLD: too_similar = True elif tertiary: hash3 = __get_remote_hash(tertiary) if imagehash.similarity(hash1, hash3) > SIMILARITY_THRESHOLD: too_similar = True primary = None if too_similar else primary return primary
def __find_best_series(book, config): ''' Queries the databse to find a best guess for a series matching the given ComicBook, based on its name, year, issue number, and other text attributes. Returns SeriesRef if a reasonable guess was found, or None if one wasn't. ''' # 1. obtain SeriesRefs for this book, removing some as dictated by prefs series_refs = db.query_series_refs( book.series_s, config.ignored_searchterms_sl ) series_refs = dbutils.filter_series_refs( series_refs, config.ignored_publishers_sl, config.ignored_before_year_n, config.ignored_after_year_n, config.never_ignore_threshold_n) # 2. obtain the first, second, and third best matching SeriesRefs for the # given book, if there are any. primary = None secondary = None tertiary = None if len(series_refs) > 0: mscore = MatchScore() def find_best_score( refs ): return reduce( lambda x,y: x if mscore.compute_n(book, x) >= mscore.compute_n(book,y) else y, refs) if refs else None primary = find_best_score(series_refs) if primary: series_refs.remove(primary) secondary = find_best_score(series_refs) if secondary: series_refs.remove(secondary) tertiary = find_best_score(series_refs) # 3. if our book is the first (or unknown) issue, figure out if the best # matching series has a similar cover to the second or third best. # if it does, we're probably dealing with a trade paperback and a # regular issue, and we can't find the best series reliably, so we bail is_first_issue = (lambda i : not i or \ (utils.is_number(i) and float(i)==1.0))(book.issue_num_s) if is_first_issue and primary and secondary: too_similar = False SIMILARITY_THRESHOLD = __MATCH_THRESHOLD - 0.10 hash1 = __get_remote_hash(primary) hash2 = __get_remote_hash(secondary) if imagehash.similarity(hash1, hash2) > SIMILARITY_THRESHOLD: too_similar = True elif tertiary: hash3 = __get_remote_hash(tertiary) if imagehash.similarity(hash1, hash3) > SIMILARITY_THRESHOLD: too_similar = True primary = None if too_similar else primary return primary
def are_the_same(hash1, hash2): x = imagehash.similarity(hash1, hash2) return x > __MATCH_THRESHOLD