def fetchIssueData(self, series_id, issue_number, settings): volume_results = self.fetchVolumeData(series_id) issues_list_results = self.fetchIssuesByVolume(series_id) found = False for record in issues_list_results: if IssueString(issue_number).asString() is None: issue_number = 1 if IssueString(record['issue_number']).asString().lower() == IssueString( issue_number).asString().lower(): found = True break if (found): issue_url = self.api_base_url + "/issue/" + CVTypeID.Issue + "-" + \ str(record['id']) + "/?api_key=" + \ self.api_key + "&format=json" cv_response = self.getCVContent(issue_url) issue_results = cv_response['results'] else: return None # Now, map the Comic Vine data to generic metadata return self.mapCVDataToMetadata( volume_results, issue_results, settings)
def search(self): ca = self.comic_archive self.match_list = [] self.cancel = False self.search_result = self.ResultNoMatches if not pil_available: self.log_msg( "Python Imaging Library (PIL) is not available and is needed for issue identification.") return self.match_list if not ca.seemsToBeAComicArchive(): self.log_msg( "Sorry, but " + opts.filename + " is not a comic archive!") return self.match_list cover_image_data = ca.getPage(self.cover_page_index) cover_hash = self.calculateHash(cover_image_data) # check the aspect ratio # if it's wider than it is high, it's probably a two page spread # if so, crop it and calculate a second hash narrow_cover_hash = None aspect_ratio = self.getAspectRatio(cover_image_data) if aspect_ratio < 1.0: right_side_image_data = self.cropCover(cover_image_data) if right_side_image_data is not None: narrow_cover_hash = self.calculateHash(right_side_image_data) #self.log_msg("Cover hash = {0:016x}".format(cover_hash)) keys = self.getSearchKeys() # normalize the issue number keys['issue_number'] = IssueString(keys['issue_number']).asString() # we need, at minimum, a series and issue number if keys['series'] is None or keys['issue_number'] is None: self.log_msg("Not enough info for a search!") return [] self.log_msg("Going to search for:") self.log_msg("\tSeries: " + keys['series']) self.log_msg("\tIssue: " + keys['issue_number']) if keys['issue_count'] is not None: self.log_msg("\tCount: " + str(keys['issue_count'])) if keys['year'] is not None: self.log_msg("\tYear: " + str(keys['year'])) if keys['month'] is not None: self.log_msg("\tMonth: " + str(keys['month'])) #self.log_msg("Publisher Blacklist: " + str(self.publisher_blacklist)) comicVine = ComicVineTalker() comicVine.wait_for_rate_limit = self.waitAndRetryOnRateLimit comicVine.setLogFunc(self.output_function) # self.log_msg(("Searching for " + keys['series'] + "...") self.log_msg(u"Searching for {0} #{1} ...".format( keys['series'], keys['issue_number'])) try: cv_search_results = comicVine.searchForSeries(keys['series']) except ComicVineTalkerException: self.log_msg( "Network issue while searching for series. Aborting...") return [] #self.log_msg("Found " + str(len(cv_search_results)) + " initial results") if self.cancel: return [] if cv_search_results is None: return [] series_second_round_list = [] #self.log_msg("Removing results with too long names, banned publishers, or future start dates") for item in cv_search_results: length_approved = False publisher_approved = True date_approved = True # remove any series that starts after the issue year if keys['year'] is not None and str( keys['year']).isdigit() and item['start_year'] is not None and str( item['start_year']).isdigit(): if int(keys['year']) < int(item['start_year']): date_approved = False # assume that our search name is close to the actual name, say # within ,e.g. 5 chars shortened_key = utils.removearticles(keys['series']) shortened_item_name = utils.removearticles(item['name']) if len(shortened_item_name) < ( len(shortened_key) + self.length_delta_thresh): length_approved = True # remove any series from publishers on the blacklist if item['publisher'] is not None: publisher = item['publisher']['name'] if publisher is not None and publisher.lower( ) in self.publisher_blacklist: publisher_approved = False if length_approved and publisher_approved and date_approved: series_second_round_list.append(item) self.log_msg( "Searching in " + str(len(series_second_round_list)) + " series") if self.callback is not None: self.callback(0, len(series_second_round_list)) # now sort the list by name length series_second_round_list.sort( key=lambda x: len(x['name']), reverse=False) # build a list of volume IDs volume_id_list = list() for series in series_second_round_list: volume_id_list.append(series['id']) try: issue_list = comicVine.fetchIssuesByVolumeIssueNumAndYear( volume_id_list, keys['issue_number'], keys['year']) except ComicVineTalkerException: self.log_msg( "Network issue while searching for series details. Aborting...") return [] if issue_list is None: return [] shortlist = list() # now re-associate the issues and volumes for issue in issue_list: for series in series_second_round_list: if series['id'] == issue['volume']['id']: shortlist.append((series, issue)) break if keys['year'] is None: self.log_msg(u"Found {0} series that have an issue #{1}".format( len(shortlist), keys['issue_number'])) else: self.log_msg( u"Found {0} series that have an issue #{1} from {2}".format( len(shortlist), keys['issue_number'], keys['year'])) # now we have a shortlist of volumes with the desired issue number # Do first round of cover matching counter = len(shortlist) for series, issue in shortlist: if self.callback is not None: self.callback(counter, len(shortlist) * 3) counter += 1 self.log_msg(u"Examining covers for ID: {0} {1} ({2}) ...".format( series['id'], series['name'], series['start_year']), newline=False) # parse out the cover date day, month, year = comicVine.parseDateStr(issue['cover_date']) # Now check the cover match against the primary image hash_list = [cover_hash] if narrow_cover_hash is not None: hash_list.append(narrow_cover_hash) try: image_url = issue['image']['super_url'] thumb_url = issue['image']['thumb_url'] page_url = issue['site_detail_url'] score_item = self.getIssueCoverMatchScore( comicVine, issue['id'], image_url, thumb_url, page_url, hash_list, useRemoteAlternates=False) except: self.match_list = [] return self.match_list match = dict() match['series'] = u"{0} ({1})".format( series['name'], series['start_year']) match['distance'] = score_item['score'] match['issue_number'] = keys['issue_number'] match['cv_issue_count'] = series['count_of_issues'] match['url_image_hash'] = score_item['hash'] match['issue_title'] = issue['name'] match['issue_id'] = issue['id'] match['volume_id'] = series['id'] match['month'] = month match['year'] = year match['publisher'] = None if series['publisher'] is not None: match['publisher'] = series['publisher']['name'] match['image_url'] = image_url match['thumb_url'] = thumb_url match['page_url'] = page_url match['description'] = issue['description'] self.match_list.append(match) self.log_msg(" --> {0}".format(match['distance']), newline=False) self.log_msg("") if len(self.match_list) == 0: self.log_msg(":-(no matches!") self.search_result = self.ResultNoMatches return self.match_list # sort list by image match scores self.match_list.sort(key=lambda k: k['distance']) l = [] for i in self.match_list: l.append(i['distance']) self.log_msg("Compared to covers in {0} issue(s):".format( len(self.match_list)), newline=False) self.log_msg(str(l)) def print_match(item): self.log_msg(u"-----> {0} #{1} {2} ({3}/{4}) -- score: {5}".format( item['series'], item['issue_number'], item['issue_title'], item['month'], item['year'], item['distance'])) best_score = self.match_list[0]['distance'] if best_score >= self.min_score_thresh: # we have 1 or more low-confidence matches (all bad cover scores) # look at a few more pages in the archive, and also alternate # covers online self.log_msg( "Very weak scores for the cover. Analyzing alternate pages and covers...") hash_list = [cover_hash] if narrow_cover_hash is not None: hash_list.append(narrow_cover_hash) for i in range(1, min(3, ca.getNumberOfPages())): image_data = ca.getPage(i) page_hash = self.calculateHash(image_data) hash_list.append(page_hash) second_match_list = [] counter = 2 * len(self.match_list) for m in self.match_list: if self.callback is not None: self.callback(counter, len(self.match_list) * 3) counter += 1 self.log_msg( u"Examining alternate covers for ID: {0} {1} ...".format( m['volume_id'], m['series']), newline=False) try: score_item = self.getIssueCoverMatchScore( comicVine, m['issue_id'], m['image_url'], m['thumb_url'], m['page_url'], hash_list, useRemoteAlternates=True) except: self.match_list = [] return self.match_list self.log_msg("--->{0}".format(score_item['score'])) self.log_msg("") if score_item['score'] < self.min_alternate_score_thresh: second_match_list.append(m) m['distance'] = score_item['score'] if len(second_match_list) == 0: if len(self.match_list) == 1: self.log_msg("No matching pages in the issue.") self.log_msg( u"--------------------------------------------------------------------------") print_match(self.match_list[0]) self.log_msg( u"--------------------------------------------------------------------------") self.search_result = self.ResultFoundMatchButBadCoverScore else: self.log_msg( u"--------------------------------------------------------------------------") self.log_msg( u"Multiple bad cover matches! Need to use other info...") self.log_msg( u"--------------------------------------------------------------------------") self.search_result = self.ResultMultipleMatchesWithBadImageScores return self.match_list else: # We did good, found something! self.log_msg("Success in secondary/alternate cover matching!") self.match_list = second_match_list # sort new list by image match scores self.match_list.sort(key=lambda k: k['distance']) best_score = self.match_list[0]['distance'] self.log_msg( "[Second round cover matching: best score = {0}]".format(best_score)) # now drop down into the rest of the processing if self.callback is not None: self.callback(99, 100) # now pare down list, remove any item more than specified distant from # the top scores for item in reversed(self.match_list): if item['distance'] > best_score + self.min_score_distance: self.match_list.remove(item) # One more test for the case choosing limited series first issue vs a trade with the same cover: # if we have a given issue count > 1 and the volume from CV has # count==1, remove it from match list if len(self.match_list) >= 2 and keys[ 'issue_count'] is not None and keys['issue_count'] != 1: new_list = list() for match in self.match_list: if match['cv_issue_count'] != 1: new_list.append(match) else: self.log_msg( "Removing volume {0} [{1}] from consideration (only 1 issue)".format( match['series'], match['volume_id'])) if len(new_list) > 0: self.match_list = new_list if len(self.match_list) == 1: self.log_msg( u"--------------------------------------------------------------------------") print_match(self.match_list[0]) self.log_msg( u"--------------------------------------------------------------------------") self.search_result = self.ResultOneGoodMatch elif len(self.match_list) == 0: self.log_msg( u"--------------------------------------------------------------------------") self.log_msg("No matches found :(") self.log_msg( u"--------------------------------------------------------------------------") self.search_result = self.ResultNoMatches else: # we've got multiple good matches: self.log_msg("More than one likely candidate.") self.search_result = self.ResultMultipleGoodMatches self.log_msg( u"--------------------------------------------------------------------------") for item in self.match_list: print_match(item) self.log_msg( u"--------------------------------------------------------------------------") return self.match_list
def mapCVDataToMetadata(self, volume_results, issue_results, settings): # Now, map the Comic Vine data to generic metadata metadata = GenericMetadata() metadata.series = issue_results['volume']['name'] num_s = IssueString(issue_results['issue_number']).asString() metadata.issue = num_s metadata.title = issue_results['name'] metadata.publisher = volume_results['publisher']['name'] metadata.day, metadata.month, metadata.year = self.parseDateStr( issue_results['cover_date']) #metadata.issueCount = volume_results['count_of_issues'] metadata.comments = self.cleanup_html( issue_results['description'], settings.remove_html_tables) if settings.use_series_start_as_volume: metadata.volume = volume_results['start_year'] metadata.notes = "Tagged with the {0} fork of ComicTagger {1} using info from Comic Vine on {2}. [Issue ID {3}]".format( ctversion.fork, ctversion.version, datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), issue_results['id']) #metadata.notes += issue_results['site_detail_url'] metadata.webLink = issue_results['site_detail_url'] person_credits = issue_results['person_credits'] for person in person_credits: if 'role' in person: roles = person['role'].split(',') for role in roles: # can we determine 'primary' from CV?? metadata.addCredit( person['name'], role.title().strip(), False) character_credits = issue_results['character_credits'] character_list = list() for character in character_credits: character_list.append(character['name']) metadata.characters = utils.listToString(character_list) team_credits = issue_results['team_credits'] team_list = list() for team in team_credits: team_list.append(team['name']) metadata.teams = utils.listToString(team_list) location_credits = issue_results['location_credits'] location_list = list() for location in location_credits: location_list.append(location['name']) metadata.locations = utils.listToString(location_list) story_arc_credits = issue_results['story_arc_credits'] arc_list = [] for arc in story_arc_credits: arc_list.append(arc['name']) if len(arc_list) > 0: metadata.storyArc = utils.listToString(arc_list) return metadata
def determineName(self, filename, ext=None): md = self.metdata new_name = self.template preferred_encoding = utils.get_actual_preferred_encoding() #print u"{0}".format(md) new_name = self.replaceToken(new_name, md.series, '%series%') new_name = self.replaceToken(new_name, md.volume, '%volume%') if md.issue is not None: issue_str = u"{0}".format( IssueString(md.issue).asString(pad=self.issue_zero_padding)) else: issue_str = None new_name = self.replaceToken(new_name, issue_str, '%issue%') new_name = self.replaceToken(new_name, md.issueCount, '%issuecount%') new_name = self.replaceToken(new_name, md.year, '%year%') new_name = self.replaceToken(new_name, md.publisher, '%publisher%') new_name = self.replaceToken(new_name, md.title, '%title%') new_name = self.replaceToken(new_name, md.month, '%month%') month_name = None if md.month is not None: if (type(md.month) == str and md.month.isdigit()) or type( md.month) == int: if int(md.month) in range(1, 13): dt = datetime.datetime(1970, int(md.month), 1, 0, 0) month_name = dt.strftime(u"%B".encode( preferred_encoding)).decode(preferred_encoding) new_name = self.replaceToken(new_name, month_name, '%month_name%') new_name = self.replaceToken(new_name, md.genre, '%genre%') new_name = self.replaceToken(new_name, md.language, '%language_code%') new_name = self.replaceToken(new_name, md.criticalRating, '%criticalrating%') new_name = self.replaceToken(new_name, md.alternateSeries, '%alternateseries%') new_name = self.replaceToken(new_name, md.alternateNumber, '%alternatenumber%') new_name = self.replaceToken(new_name, md.alternateCount, '%alternatecount%') new_name = self.replaceToken(new_name, md.imprint, '%imprint%') new_name = self.replaceToken(new_name, md.format, '%format%') new_name = self.replaceToken(new_name, md.maturityRating, '%maturityrating%') new_name = self.replaceToken(new_name, md.storyArc, '%storyarc%') new_name = self.replaceToken(new_name, md.seriesGroup, '%seriesgroup%') new_name = self.replaceToken(new_name, md.scanInfo, '%scaninfo%') if self.smart_cleanup: # remove empty braces,brackets, parentheses new_name = re.sub("\(\s*[-:]*\s*\)", "", new_name) new_name = re.sub("\[\s*[-:]*\s*\]", "", new_name) new_name = re.sub("\{\s*[-:]*\s*\}", "", new_name) # remove duplicate spaces new_name = u" ".join(new_name.split()) # remove remove duplicate -, _, new_name = re.sub("[-_]{2,}\s+", "-- ", new_name) new_name = re.sub("(\s--)+", " --", new_name) new_name = re.sub("(\s-)+", " -", new_name) # remove dash or double dash at end of line new_name = re.sub("[-]{1,2}\s*$", "", new_name) # remove duplicate spaces (again!) new_name = u" ".join(new_name.split()) if ext is None: ext = os.path.splitext(filename)[1] new_name += ext # some tweaks to keep various filesystems happy new_name = new_name.replace("/", "-") new_name = new_name.replace(" :", " -") new_name = new_name.replace(": ", " - ") new_name = new_name.replace(":", "-") new_name = new_name.replace("?", "") return new_name
def performQuery(self): QtGui.QApplication.setOverrideCursor( QtGui.QCursor(QtCore.Qt.WaitCursor)) try: comicVine = ComicVineTalker() volume_data = comicVine.fetchVolumeData(self.series_id) self.issue_list = comicVine.fetchIssuesByVolume(self.series_id) except ComicVineTalkerException as e: QtGui.QApplication.restoreOverrideCursor() if e.code == ComicVineTalkerException.RateLimit: QtGui.QMessageBox.critical( self, self.tr("Comic Vine Error"), ComicVineTalker.getRateLimitMessage()) else: QtGui.QMessageBox.critical( self, self.tr("Network Issue"), self.tr("Could not connect to ComicVine to list issues!")) return while self.twList.rowCount() > 0: self.twList.removeRow(0) self.twList.setSortingEnabled(False) row = 0 for record in self.issue_list: self.twList.insertRow(row) item_text = record['issue_number'] item = IssueNumberTableWidgetItem(item_text) item.setData(QtCore.Qt.ToolTipRole, item_text) item.setData(QtCore.Qt.UserRole, record['id']) item.setData(QtCore.Qt.DisplayRole, item_text) item.setFlags(QtCore.Qt.ItemIsSelectable | QtCore.Qt.ItemIsEnabled) self.twList.setItem(row, 0, item) item_text = record['cover_date'] if item_text is None: item_text = "" #remove the day of "YYYY-MM-DD" parts = item_text.split("-") if len(parts) > 1: item_text = parts[0] + "-" + parts[1] item = QtGui.QTableWidgetItem(item_text) item.setData(QtCore.Qt.ToolTipRole, item_text) item.setFlags(QtCore.Qt.ItemIsSelectable | QtCore.Qt.ItemIsEnabled) self.twList.setItem(row, 1, item) item_text = record['name'] if item_text is None: item_text = "" item = QtGui.QTableWidgetItem(item_text) item.setData(QtCore.Qt.ToolTipRole, item_text) item.setFlags(QtCore.Qt.ItemIsSelectable | QtCore.Qt.ItemIsEnabled) self.twList.setItem(row, 2, item) if IssueString( record['issue_number']).asString().lower() == IssueString( self.issue_number).asString().lower(): self.initial_id = record['id'] row += 1 self.twList.setSortingEnabled(True) self.twList.sortItems(0, QtCore.Qt.AscendingOrder) QtGui.QApplication.restoreOverrideCursor()
def __lt__(self, other): selfStr = self.data(QtCore.Qt.DisplayRole).toString() otherStr = other.data(QtCore.Qt.DisplayRole).toString() return (IssueString(selfStr).asFloat() < IssueString(otherStr).asFloat())