def _create_key_tag_s(issue_key): ''' ComicVine implementation of the identically named method in the db.py ''' try: return "CVDB" + utils.sstr(int(issue_key)) except: log.debug_exc("Couldn't create key tag out of: " + sstr(issue_key)) return None
def __volume_to_seriesref(volume): ''' Converts a cvdb "volume" dom element into a SeriesRef. ''' publisher = '' if len(volume.publisher.__dict__) <= 1 else \ volume.publisher.name return SeriesRef( int(volume.id), sstr(volume.name), sstr(volume.start_year).rstrip("- "), # see bug 334 sstr(publisher), sstr(volume.count_of_issues), __parse_image_url(volume))
def _query_issue_id_dom(API_KEY, seriesid_s, issue_num_s): """ Performs a query that will obtain a dom containing the issue ID for the given issue number in the given series id. This method doesn't return null, but it may throw Exceptions. """ # {0} is the series ID, an integer, and {1} is issue number, a string QUERY = ( "http://comicvine.com/api/issues/?api_key=" + API_KEY + __CLIENTID + "&format=xml&field_list=name,issue_number,id,image" + "&filter=volume:{0},issue_number:{1}" ) # cv does not play well with leading zeros in issue nums. see issue #403. issue_num_s = sstr(issue_num_s).strip() if len(issue_num_s) > 0: # fix issue 411 issue_num_s = issue_num_s.lstrip("0").strip() issue_num_s = issue_num_s if len(issue_num_s) > 0 else "0" if not seriesid_s or not issue_num_s: raise ValueError("bad parameters") return __get_dom(QUERY.format(sstr(seriesid_s), HttpUtility.UrlPathEncode(sstr(issue_num_s))))
def __set_crossovers_sl(self, crossovers_sl): ''' called when you assign a value to 'self.crossovers_sl' ''' try: self.__crossovers_sl = \ [sstr(x) for x in crossovers_sl if x and len(sstr(x).strip())>0] except: self.__crossovers_sl = []
def __set_colorists_sl(self, colorists_sl): ''' called when you assign a value to 'self.colorists_sl' ''' try: self.__colorists_sl = [ re.sub(r',|;', '', sstr(x)) for x in colorists_sl if x and len(sstr(x).strip())>0 ] except: self.__colorists_sl = []
def _create_key_tag_s(issue_key): """ ComicVine implementation of the identically named method in the db.py """ try: return "CVDB" + utils.sstr(int(issue_key)) except: log.debug_exc("Couldn't create key tag out of: " + sstr(issue_key)) return None
def __set_image_urls_sl(self, image_urls_sl): ''' called when you assign a value to 'self.image_urls_sl' ''' try: self.__image_urls_sl =\ [ sstr(x) for x in image_urls_sl if x and len(sstr(x).strip())>0 ] except: self.__image_urls_sl = []
def __set_letterers_sl(self, letterers_sl): ''' called when you assign a value to 'self.letterers_sl' ''' try: self.__letterers_sl = [ re.sub(r',|;', '', sstr(x)) for x in letterers_sl if x and len(sstr(x).strip())>0 ] except: self.__letterers_sl = []
def _check_magic_file(path_s): ''' ComicVine implementation of the identically named method in the db.py ''' series_ref = None file_s = None try: # 1. get the directory to search for a cvinfo file in, or None dir_s = path_s if path_s and Directory.Exists(path_s) else \ Path.GetDirectoryName(path_s) if path_s else None dir_s = dir_s if dir_s and Directory.Exists(dir_s) else None if dir_s: # 2. search in that directory for a properly named cvinfo file # note that Windows filenames are not case sensitive. for f in [dir_s + "\\" + x for x in ["cvinfo.txt", "cvinfo"]]: if File.Exists(f): file_s = f # 3. if we found a file, read it's contents in, and parse the # comicvine series id out of it, if possible. if file_s: with StreamReader(file_s, Encoding.UTF8, False) as sr: line = sr.ReadToEnd() line = line.strip() if line else line series_ref = __url_to_seriesref(line) except: log.debug_exc("bad cvinfo file: " + sstr(file_s)) if file_s and not series_ref: log.debug("ignoring bad cvinfo file: ", sstr(file_s)) return series_ref # may be None!
def _query_issue_refs(series_ref, callback_function=lambda x: False): ''' ComicVine implementation of the identically named method in the db.py ''' # a comicvine series key can be interpreted as an integer series_id_n = int(series_ref.series_key) cancelled_b = [False] issue_refs = set() # 1. do the initial query, record how many results in total we're getting dom = cvconnection._query_issue_ids_dom(__api_key, sstr(series_id_n), 1) num_results_n = int(dom.number_of_total_results) if dom else 0 if num_results_n > 0: # 2. convert the results of the initial query to IssueRefs and then add # them to the returned set. notice that the dom could contain single # issue OR a list of issues in its 'issue' variable. if not isinstance(dom.results.issue, list): issue_refs.add(__issue_to_issueref(dom.results.issue)) else: for issue in dom.results.issue: issue_refs.add(__issue_to_issueref(issue)) # 3. if there were more than 100 results, we'll have to do some more # queries now to get the rest of them RESULTS_PAGE_SIZE = 100 iteration = RESULTS_PAGE_SIZE if iteration < num_results_n: # 3a. do a callback for the first results (initial query)... cancelled_b[0] = callback_function( float(iteration) / num_results_n) while iteration < num_results_n and not cancelled_b[0]: # 4. query for the next batch of results, in a new dom dom = cvconnection._query_issue_ids_dom( __api_key, sstr(series_id_n), iteration // RESULTS_PAGE_SIZE + 1) iteration += RESULTS_PAGE_SIZE # 4a. do a callback for the most recent batch of results cancelled_b[0] = callback_function( float(iteration) / num_results_n) if int(dom.number_of_page_results) < 1: log.debug("WARNING: got empty results page") else: # 5. convert the current batch of results into IssueRefs, # and then add them to the returned list. Again, the dom # could contain a single issue, OR a list. if not isinstance(dom.results.issue, list): issue_refs.add( __issue_to_issueref(dom.results.issue)) else: for issue in dom.results.issue: issue_refs.add(__issue_to_issueref(issue)) # 6. Done. issue_refs now contained whatever IssueRefs we could find return set() if cancelled_b[0] else issue_refs
def __set_letterers_sl(self, letterers_sl): ''' called when you assign a value to 'self.letterers_sl' ''' try: self.__letterers_sl = [ re.sub(r',|;', '', sstr(x)) for x in letterers_sl if x and len(sstr(x).strip()) > 0 ] except: self.__letterers_sl = []
def __set_colorists_sl(self, colorists_sl): ''' called when you assign a value to 'self.colorists_sl' ''' try: self.__colorists_sl = [ re.sub(r',|;', '', sstr(x)) for x in colorists_sl if x and len(sstr(x).strip()) > 0 ] except: self.__colorists_sl = []
def _query_issue_refs(series_ref, callback_function=lambda x: False): """ ComicVine implementation of the identically named method in the db.py """ # a comicvine series key can be interpreted as an integer series_id_n = int(series_ref.series_key) cancelled_b = [False] issue_refs = set() # 1. do the initial query, record how many results in total we're getting dom = cvconnection._query_issue_ids_dom(__api_key, sstr(series_id_n), 1) num_results_n = int(dom.number_of_total_results) if dom else 0 if num_results_n > 0: # 2. convert the results of the initial query to IssueRefs and then add # them to the returned set. notice that the dom could contain single # issue OR a list of issues in its 'issue' variable. if not isinstance(dom.results.issue, list): issue_refs.add(__issue_to_issueref(dom.results.issue)) else: for issue in dom.results.issue: issue_refs.add(__issue_to_issueref(issue)) # 3. if there were more than 100 results, we'll have to do some more # queries now to get the rest of them RESULTS_PAGE_SIZE = 100 iteration = RESULTS_PAGE_SIZE if iteration < num_results_n: # 3a. do a callback for the first results (initial query)... cancelled_b[0] = callback_function(float(iteration) / num_results_n) while iteration < num_results_n and not cancelled_b[0]: # 4. query for the next batch of results, in a new dom dom = cvconnection._query_issue_ids_dom( __api_key, sstr(series_id_n), iteration // RESULTS_PAGE_SIZE + 1 ) iteration += RESULTS_PAGE_SIZE # 4a. do a callback for the most recent batch of results cancelled_b[0] = callback_function(float(iteration) / num_results_n) if int(dom.number_of_page_results) < 1: log.debug("WARNING: got empty results page") else: # 5. convert the current batch of results into IssueRefs, # and then add them to the returned list. Again, the dom # could contain a single issue, OR a list. if not isinstance(dom.results.issue, list): issue_refs.add(__issue_to_issueref(dom.results.issue)) else: for issue in dom.results.issue: issue_refs.add(__issue_to_issueref(issue)) # 6. Done. issue_refs now contained whatever IssueRefs we could find return set() if cancelled_b[0] else issue_refs
def delegate(): if self.__persist_size_key_s or self.__persist_loc_key_s: prefs = load_map(Resources.GEOMETRY_FILE) if self.__persist_loc_key_s: prefs[self.__persist_loc_key_s] =\ sstr(self.Location.X) + "," + sstr(self.Location.Y) if self.__persist_size_key_s: prefs[self.__persist_size_key_s] =\ sstr(self.Width) + "," + sstr(self.Height) persist_map(prefs, Resources.GEOMETRY_FILE)
def __init__(self, database_name_s, url_s, underlying, error_code_s="0"): ''' database_name_s -> the name of the database that raised this error url_s -> the url that caused the problem underlying => the underlying io exception object or error string error_code => the underlying database error code, or 0 if there isn't one ''' super(Exception,self).__init__(sstr(database_name_s) + " database could not be reached\n"\ "url: " + re.sub(r"api_key=[^&]*", r"api_key=...", url_s) + "\nCAUSE: " + sstr(underlying).replace('\r','') ) # .NET exception self.__database_name_s = sstr(database_name_s) self.__error_code_s = sstr(error_code_s).strip()
def __init__(self, database_name_s, url_s, underlying, error_code_s="0"): ''' database_name_s -> the name of the database that raised this error url_s -> the url that caused the problem underlying => the underlying io exception object or error string error_code => the underlying database error code, or 0 if there isn't one ''' super(DatabaseConnectionError,self).__init__(sstr(database_name_s) + " database could not be reached\n"\ "url: " + re.sub(r"api_key=[^&]*", r"api_key=...", url_s) + "\nCAUSE: " + sstr(underlying).replace('\r','') ) # .NET exception self.__database_name_s = sstr(database_name_s) self.__error_code_s = sstr(error_code_s).strip()
def __build_label(self, series_ref): """ builds and returns the main text label for this form """ # 1. compute the best possible full name for the given SeriesRef name_s = series_ref.series_name_s publisher_s = series_ref.publisher_s vol_year_n = series_ref.volume_year_n vol_year_s = sstr(vol_year_n) if vol_year_n > 0 else "" fullname_s = "" if name_s: if publisher_s: if vol_year_s: fullname_s = "'" + name_s + "' (" + publisher_s + ", " + vol_year_s + ")" else: fullname_s = "'" + name_s + "' (" + publisher_s + ")" else: fullname_s = "'" + name_s + "'" label = Label() label.UseMnemonic = False sep = " " if len(fullname_s) > 40 else "\n" label.Text = i18n.get("IssueFormChooseText").format(fullname_s, sep) if self.__config.show_covers_b: label.Location = Point(218, 20) label.Size = Size(480, 40) else: label.Location = Point(10, 20) label.Size = Size(680, 40) return label
def __issue_scrape_extra_details(issue, page): ''' Parse additional details from the issues ComicVine webpage. ''' if page: # first pass: find all the alternate cover image urls regex = re.compile( \ r'(?mis)\<\s*div[^\>]*img imgboxart issue-cover[^\>]+\>(.*?)div\s*>') for div_s in re.findall( regex, page )[1:]: inner_search_results = re.search(\ r'(?i)\<\s*img\s+.*src\s*=\s*"([^"]*)', div_s) if inner_search_results: image_url_s = inner_search_results.group(1) if image_url_s: issue.image_urls_sl.append(image_url_s) # second pass: find the community rating (stars) for this comic regex = re.compile(\ r'(?mis)\<span class="average-score"\>(\d+\.?\d*) stars?\</span\>') results = re.search( regex, page ) if results: try: rating = float(results.group(1)) if rating > 0: issue.rating_n = rating except: log.debug_exc("Error parsing rating for " + sstr(issue) + ": ")
def show_form(self): """ Displays this form, blocking until the user closes it. When it is closed, it will return an IssueFormResult describing how it was closed, and any IssueRef that may have been chosen when it was closed. """ dialogAnswer = self.ShowDialog(self.Owner) # blocks if dialogAnswer == DialogResult.OK: issue = self.__issue_refs[self.__chosen_index] result = IssueFormResult("OK", issue) alt_choice = self.__coverpanel.get_alt_issue_cover_choice() if alt_choice: issue_ref, image_ref = alt_choice # the user chose a non-default cover image for this issue. # we'll store that choice in the global "session data map", # in case any other part of the program wants to use it. alt_cover_key = sstr(issue_ref.issue_key) + "-altcover" self.__config.session_data_map[alt_cover_key] = image_ref elif dialogAnswer == DialogResult.Cancel: result = IssueFormResult("CANCEL") elif dialogAnswer == DialogResult.Ignore: if self.ModifierKeys == Keys.Control: result = IssueFormResult("PERMSKIP") else: result = IssueFormResult("SKIP") elif dialogAnswer == DialogResult.Retry: result = IssueFormResult("BACK") else: raise Exception() return result
def __build_label(self, series_ref): ''' builds and returns the main text label for this form ''' # 1. compute the best possible full name for the given SeriesRef name_s = series_ref.series_name_s publisher_s = series_ref.publisher_s vol_year_n = series_ref.volume_year_n vol_year_s = sstr(vol_year_n) if vol_year_n > 0 else '' fullname_s = '' if name_s: if publisher_s: if vol_year_s: fullname_s = "'"+name_s+"' ("+publisher_s+", " + vol_year_s + ")" else: fullname_s = "'"+name_s+"' (" + publisher_s + ")" else: fullname_s = "'"+name_s+"'" label = Label() label.UseMnemonic = False sep = ' ' if len(fullname_s) > 40 else '\n' label.Text = i18n.get("IssueFormChooseText").format(fullname_s, sep) if self.__config.show_covers_b: label.Location = Point(218, 20) label.Size = Size(480, 40) else: label.Location = Point(10, 20) label.Size = Size(680, 40) return label
def show_form(self): ''' Displays this form, blocking until the user closes it. When it is closed, it will return an IssueFormResult describing how it was closed, and any IssueRef that may have been chosen when it was closed. ''' dialogAnswer = self.ShowDialog(self.Owner) # blocks if dialogAnswer == DialogResult.OK: issue = self.__issue_refs[self.__chosen_index] result = IssueFormResult( "OK", issue ) alt_choice = self.__coverpanel.get_alt_issue_cover_choice() if alt_choice: issue_ref, image_ref = alt_choice # the user chose a non-default cover image for this issue. # we'll store that choice in the global "session data map", # in case any other part of the program wants to use it. alt_cover_key = sstr(issue_ref.issue_key) + "-altcover" self.__config.session_data_map[alt_cover_key] = image_ref elif dialogAnswer == DialogResult.Cancel: result = IssueFormResult( "CANCEL" ) elif dialogAnswer == DialogResult.Ignore: if self.ModifierKeys == Keys.Control: result = IssueFormResult( "PERMSKIP" ) else: result = IssueFormResult( "SKIP" ) elif dialogAnswer == DialogResult.Retry: result = IssueFormResult( "BACK" ) else: raise Exception() return result
def _check_magic_file(path_s): ''' ComicVine implementation of the identically named method in the db.py ''' series_key_s = None file_s = None try: # 1. get the directory to search for a cvinfo file in, or None dir_s = path_s if path_s and Directory.Exists(path_s) else \ Path.GetDirectoryName(path_s) if path_s else None dir_s = dir_s if dir_s and Directory.Exists(dir_s) else None if dir_s: # 2. search in that directory for a properly named cvinfo file # note that Windows filenames are not case sensitive. for f in [dir_s + "\\" + x for x in ["cvinfo.txt", "cvinfo"]]: if File.Exists(f): file_s = f # 3. if we found a file, read it's contents in, and parse the # comicvine series id out of it, if possible. if file_s: with StreamReader(file_s, Encoding.UTF8, False) as sr: line = sr.ReadToEnd() line = line.strip() if line else line match = re.match(r"^.*?\b(49|4050)-(\d{2,})\b.*$", line) line = match.group(2) if match else line if utils.is_number(line): series_key_s = utils.sstr(int(line)) except: log.debug_exc("bad cvinfo file: " + sstr(file_s)) # 4. did we find a series key? if so, query comicvine to build a proper # SeriesRef object for that series key. series_ref = None if series_key_s: try: dom = cvconnection._query_series_details_dom( __api_key, utils.sstr(series_key_s)) num_results_n = int(dom.number_of_total_results) series_ref =\ __volume_to_seriesref(dom.results) if num_results_n==1 else None except: log.debug_exc("error getting SeriesRef for: " + sstr(series_key_s)) if file_s and not series_ref: log.debug("ignoring bad cvinfo file: ", sstr(file_s)) return series_ref # may be None!
def _query_issue_id_dom(API_KEY, seriesid_s, issue_num_s): ''' Performs a query that will obtain a dom containing the issue ID for the given issue number in the given series id. This method doesn't return null, but it may throw Exceptions. ''' # {0} is the series ID, an integer, and {1} is issue number, a string QUERY = 'http://comicvine.com/api/issues/?api_key=' + API_KEY + \ __CLIENTID + '&format=xml&field_list=name,issue_number,id,image' + \ '&filter=volume:{0},issue_number:{1}' if not seriesid_s or not issue_num_s: raise ValueError('bad parameters') return __get_dom( QUERY.format(sstr(seriesid_s), HttpUtility.UrlPathEncode(sstr(issue_num_s)) ) )
def get_debug_string(self): ''' Gets a simple little debug string summarizing this result.''' if self.equals("SKIP"): return "SKIP scraping this book" elif self.equals("PERMSKIP"): return "ALWAYS SKIP scraping this book" elif self.equals("CANCEL"): return "CANCEL this scrape operation" elif self.equals("SEARCH"): return "SEARCH AGAIN for more series" elif self.equals("SHOW"): return "SHOW ISSUES for: '" + sstr(self.get_ref()) + "'" elif self.equals("OK"): return "SCRAPE using: '" + sstr(self.get_ref()) + "'" else: raise Exception()
def handle_error(error): ''' Handles the given error object (a python or .net exception) by formatting it nicely and then printing it to the debug log. If the 'app_window' provided to the 'install' method was not None, an "unexpected error" message will also be displayed for the user in a modal dialog owned by the app_window. This method should be an application's normal way to handle unexpected errors and exceptions. ''' global __logger, __app_window if not __logger: return # if none, do current python exception. else sstr() the given exception if isinstance(error, Exception): debug("------------------- PYTHON ERROR ------------------------") debug_exc() # a python exception else: debug("-------------------- .NET ERROR -------------------------") debug(utils.sstr(error).replace('\r','')) # a .NET exception if __app_window: handled = False if type(error) == DatabaseConnectionError: # if this is a DatabaseConnectionError, then it is a semi-expected # error that may get a special error message if error.get_error_code_s() == "100": # coryhigh: i18n MessageBox.Show(__app_window, # invalid api key i18n.get("LogDBErrorApiKeyText").format(error.get_db_name_s()), i18n.get("LogDBErrorTitle"), MessageBoxButtons.OK, MessageBoxIcon.Warning) handled = True elif error.get_error_code_s() == "107": MessageBox.Show(__app_window, # rate limit reached i18n.get("LogDBErrorRateText").format(error.get_db_name_s()), i18n.get("LogDBErrorTitle"), MessageBoxButtons.OK, MessageBoxIcon.Warning) handled = True elif error.get_error_code_s() == "0": MessageBox.Show(__app_window, # generic i18n.get("LogDBErrorText").format(error.get_db_name_s()), i18n.get("LogDBErrorTitle"), MessageBoxButtons.OK, MessageBoxIcon.Warning) handled = True if not handled: # all other errors are considered "unexpected", and handled generically result = MessageBox.Show(__app_window, i18n.get("LogErrorText"), i18n.get("LogErrorTitle"), MessageBoxButtons.YesNo, MessageBoxIcon.Error) if result == DialogResult.Yes: save(True)
def _query_issue_id_dom(API_KEY, seriesid_s, issue_num_s): ''' Performs a query that will obtain a dom containing the issue ID for the given issue number in the given series id. This method doesn't return null, but it may throw Exceptions. ''' # {0} is the series ID, an integer, and {1} is issue number, a string QUERY = 'http://comicvine.com/api/issues/?api_key=' + API_KEY + \ __CLIENTID + '&format=xml&field_list=name,issue_number,id,image' + \ '&filter=volume:{0},issue_number:{1}' if not seriesid_s or not issue_num_s: raise ValueError('bad parameters') return __get_dom( QUERY.format(sstr(seriesid_s), HttpUtility.UrlPathEncode(sstr(issue_num_s))))
def __start_scrape(self, book, num_remaining): ''' This method gets called once for each comic that the ScrapeEngine is scraping; the call happens just before the scrape begins. The method updates all necessary graphical components to reflect the current scrape. 'book' -> the comic book object that is about to be scraped 'num_remaining' -> the # of books left to scrape (including current one) ''' # 1. obtain a nice filename string to put into out Label book_name = Path.GetFileName( book.path_s.strip()) # path_s is never None fileless = book_name == "" if fileless: # 1a. this is a fileless book, so build up a nice, detailed name book_name = book.series_s if not book_name: book_name = "<" + i18n.get("ComicFormUnknown") + ">" book_name += (' #' + book.issue_num_s) if book.issue_num_s else '' book_name += (' ({0} {1})'.format( i18n.get("ComicFormVolume"), sstr(book.volume_year_n) ) ) \ if book.volume_year_n >= 0 else (' ('+sstr(book.pub_year_n) +')') \ if book.pub_year_n >= 0 else '' # 2. obtain a copy of the first (cover) page of the book to install page_image = book.create_image_of_page(0) page_count = book.page_count_n # 3. install those values into the ComicForm. update progressbar. def delegate(): # NOTE: now we're on the ComicForm Application Thread self.__current_book = book self.__current_page = 0 self.__current_page_count = page_count self.__label.Text = i18n.get("ComicFormScrapingLabel") + book_name self.__pbox_panel.set_image(page_image) # cover image may be None self.__progbar.PerformStep() self.__progbar.Maximum = self.__progbar.Value + num_remaining self.__cancel_button.Text=\ i18n.get("ComicFormCancelButton").format(sstr(num_remaining)) self.Update() utils.invoke(self, delegate, False)
def _query_issue(issue_ref, slow_data): """ ComicVine implementation of the identically named method in the db.py """ # interesting: can we implement a cache here? could speed things up... issue = Issue(issue_ref) dom = cvconnection._query_issue_details_dom(__api_key, sstr(issue_ref.issue_key)) __issue_parse_simple_stuff(issue, dom) __issue_parse_series_details(issue, dom) __issue_parse_story_credits(issue, dom) __issue_parse_summary(issue, dom) __issue_parse_roles(issue, dom) if slow_data: # grab extra cover images and a community rating score page = cvconnection._query_issue_details_page(__api_key, sstr(issue_ref.issue_key)) __issue_scrape_extra_details(issue, page) return issue
def __massage_new_date(label, new_value, old_value, update, overwrite, \ ignoreblanks, blank_value): ''' Returns a date tuple of three ints (YYYY,MM,DD) that should be copied into our backing ComicBook object, IFF that tuple is not None. Uses a number of rules to decide what to return. label - a human readable description of the given date being changed. new_value - proposed new date to copy over. a tuple of ints (YYYY,MM,DD) old_value - original date. a tuple of ints (YYYY,MM,DD) update - if false, this method always returns None overwrite - whether it's acceptable to overwrite the old value with the new value when the old value is non-blank. ignoreblanks - if true, we'll never overwrite with an old non-blank date with a new date that has any blank values. blank_value - the value that should be considered 'blank' for any of the individual elements in the given date tuples. ''' # first, a little housekeeping so that we stay really robust blank_date = (blank_value, blank_value, blank_value) new_value = blank_date if not new_value else new_value old_value = blank_date if not old_value else old_value if type(blank_value) != int: raise TypeError("wrong type for blank value") if len(old_value) != 3 or type(old_value[2]) != int: raise TypeError("wrong type for old value") if len(new_value) != 3 or type(new_value[2]) != int: raise TypeError("wrong type for new value") # now decide about whether or not to actually do the update # only update if all of the following are true: # 1) the update option is turned on for this particular field # 2) we can overwrite the existing value, or there is no existing value # 3) we're not overwriting with a blank value unless we're allowed to retval = None if update and (overwrite or old_value == blank_date) and \ not (ignoreblanks and new_value == blank_date): retval = new_value marker = ' ' if old_value != new_value: marker = '*' if retval == blank_date: log.debug("--> ", marker, label.ljust(15), ": ") else: log.debug( "--> ", marker, label.ljust(15), ": ", '-'.join([ '??' if x == blank_value else sstr(x) for x in retval ])) else: log.debug("--> ", label.ljust(15), ": --- skipped ---") return retval
def __unique_series_s(self): ''' Gets the unique series name for this ComicBook. This is a special string that will be identical for (and only for) any comic books that "appear" to be from the same series. The unique series name is meant to be used internally (i.e. the key for a map, or for grouping ComicBooks), not for displaying to users. This value is NOT the same as the series_s property. ''' bd = self.__bookdata sname = '' if not bd.series_s else bd.series_s if sname and bd.format_s: sname += bd.format_s sname = re.sub('\W+', '', sname).lower() svolume = '' if sname: if bd.volume_year_n and bd.volume_year_n > 0: svolume = sstr(bd.volume_year_n) else: # if we can't find a name at all (very weird), fall back to the # memory ID, which is be unique and thus ensures that this # comic doesn't get lumped in to the same series choice as any # other unnamed comics! sname = "uniqueid-" + utils.sstr(id(self)) # generate a hash to add onto the string. the hash should be identical # for all comics that belong to the same series, and different otherwise. # not how by default, comics that are in different directories are always # considered to belong to different series. location = Path.GetDirectoryName(bd.path_s) if bd.path_s else None location = location if location else '' hash = svolume if self.__scraper.config.ignore_folders_b \ else location + svolume if hash: with MD5.Create() as md5: bytes = md5.ComputeHash(Encoding.UTF8.GetBytes(hash)) hash = ''.join( [ "%02X" % x for x in bytes[:5] ] ).strip() return sname + hash
def __start_scrape(self, book, num_remaining): ''' This method gets called once for each comic that the ScrapeEngine is scraping; the call happens just before the scrape begins. The method updates all necessary graphical components to reflect the current scrape. 'book' -> the comic book object that is about to be scraped 'num_remaining' -> the # of books left to scrape (including current one) ''' # 1. obtain a nice filename string to put into out Label book_name = Path.GetFileName(book.path_s.strip()) # path_s is never None fileless = book_name == "" if fileless: # 1a. this is a fileless book, so build up a nice, detailed name book_name = book.series_s if not book_name: book_name = "<" + i18n.get("ComicFormUnknown") + ">" book_name += (' #' + book.issue_num_s) if book.issue_num_s else '' book_name += (' ({0} {1})'.format( i18n.get("ComicFormVolume"), sstr(book.volume_year_n) ) ) \ if book.volume_year_n >= 0 else (' ('+sstr(book.pub_year_n) +')') \ if book.pub_year_n >= 0 else '' # 2. obtain a copy of the first (cover) page of the book to install page_image = book.create_image_of_page(0) page_count = book.page_count_n # 3. install those values into the ComicForm. update progressbar. def delegate(): # NOTE: now we're on the ComicForm Application Thread self.__current_book = book self.__current_page = 0 self.__current_page_count = page_count self.__label.Text = i18n.get("ComicFormScrapingLabel") + book_name self.__pbox_panel.set_image(page_image) # cover image may be None self.__progbar.PerformStep() self.__progbar.Maximum = self.__progbar.Value + num_remaining self.__cancel_button.Text=\ i18n.get("ComicFormCancelButton").format(sstr(num_remaining)) self.Update() utils.invoke(self, delegate, False)
def __massage_new_date(label, new_value, old_value, update, overwrite, \ ignoreblanks, blank_value): ''' Returns a date tuple of three ints (YYYY,MM,DD) that should be copied into our backing ComicBook object, IFF that tuple is not None. Uses a number of rules to decide what to return. label - a human readable description of the given date being changed. new_value - proposed new date to copy over. a tuple of ints (YYYY,MM,DD) old_value - original date. a tuple of ints (YYYY,MM,DD) update - if false, this method always returns None overwrite - whether it's acceptable to overwrite the old value with the new value when the old value is non-blank. ignoreblanks - if true, we'll never overwrite with an old non-blank date with a new date that has any blank values. blank_value - the value that should be considered 'blank' for any of the individual elements in the given date tuples. ''' # first, a little housekeeping so that we stay really robust blank_date = (blank_value,blank_value,blank_value) new_value = blank_date if not new_value else new_value old_value = blank_date if not old_value else old_value if type(blank_value) != int: raise TypeError("wrong type for blank value"); if len(old_value) != 3 or type(old_value[2]) != int: raise TypeError("wrong type for old value"); if len(new_value) != 3 or type(new_value[2]) != int: raise TypeError("wrong type for new value"); # now decide about whether or not to actually do the update # only update if all of the following are true: # 1) the update option is turned on for this particular field # 2) we can overwrite the existing value, or there is no existing value # 3) we're not overwriting with a blank value unless we're allowed to retval = None; if update and (overwrite or old_value == blank_date) and \ not (ignoreblanks and new_value == blank_date): retval = new_value marker = ' ' if old_value != new_value: marker = '*' if retval == blank_date: log.debug("--> ", marker, label.ljust(15), ": ") else: log.debug("--> ", marker, label.ljust(15), ": ", '-'.join( ['??' if x == blank_value else sstr(x) for x in retval]) ) else: log.debug("--> ", label.ljust(15), ": --- skipped ---") return retval
def set_status(self, status): ''' Changes the search status of this _ButtonModel. Must be one of: 'not-searched' : user has not requested a search for more image refs 'searching' : a background thread is currently search for image refs 'searched' : the search for more image refs is now complete ''' if status == 'not-searched' or status == 'searching' or status == 'searched': self.__status = status else: raise Exception("bad status received: ", sstr(status))
def set_status(self, status): ''' Changes the search status of this _ButtonModel. Must be one of: 'not-searched' : user has not requested a search for more image refs 'searching' : a background thread is currently search for image refs 'searched' : the search for more image refs is now complete ''' if status=='not-searched' or status=='searching' or status=='searched': self.__status = status else: raise Exception("bad status received: ", sstr(status))
def record_choice(self, series_ref): ''' Records the fact that the given SeriesRef was selected by the user. Future MatchScore objects will have this information, which they can use to compute more accurate scores. ''' series_sl = self.__prior_series_sl key_s = sstr(series_ref.series_key) if series_ref else "" if key_s and not key_s in series_sl: series_sl.add(key_s) utils.persist_map({x: x for x in series_sl}, Resources.SERIES_FILE)
def record_choice(self, series_ref): ''' Records the fact that the given SeriesRef was selected by the user. Future MatchScore objects will have this information, which they can use to compute more accurate scores. ''' series_sl = self.__prior_series_sl key_s = sstr(series_ref.series_key) if series_ref else "" if key_s and not key_s in series_sl: series_sl.add(key_s) utils.persist_map({x:x for x in series_sl}, Resources.SERIES_FILE)
def __unique_series_s(self): ''' Gets the unique series name for this ComicBook. This is a special string that will be identical for (and only for) any comic books that "appear" to be from the same series. The unique series name is meant to be used internally (i.e. the key for a map, or for grouping ComicBooks), not for displaying to users. This value is NOT the same as the series_s property. ''' bd = self.__bookdata sname = '' if not bd.series_s else bd.series_s if sname and bd.format_s: sname += bd.format_s sname = re.sub('\W+', '', sname).lower() svolume = '' if sname: if bd.volume_year_n and bd.volume_year_n > 0: svolume = sstr(bd.volume_year_n) else: # if we can't find a name at all (very weird), fall back to the # memory ID, which is be unique and thus ensures that this # comic doesn't get lumped in to the same series choice as any # other unnamed comics! sname = "uniqueid-" + utils.sstr(id(self)) # generate a hash to add onto the string. the hash should be identical # for all comics that belong to the same series, and different otherwise. # not how by default, comics that are in different directories are always # considered to belong to different series. location = Path.GetDirectoryName(bd.path_s) if bd.path_s else None location = location if location else '' hash = svolume if self.__scraper.config.ignore_folders_b \ else location + svolume if hash: with MD5.Create() as md5: bytes = md5.ComputeHash(Encoding.UTF8.GetBytes(hash)) hash = ''.join(["%02X" % x for x in bytes[:5]]).strip() return sname + hash
def delegate(): # NOTE: now we're on the ComicForm Application Thread self.__current_book = book self.__current_page = 0 self.__current_page_count = page_count self.__label.Text = i18n.get("ComicFormScrapingLabel") + book_name self.__pbox_panel.set_image(page_image) # cover image may be None self.__progbar.PerformStep() self.__progbar.Maximum = self.__progbar.Value + num_remaining self.__cancel_button.Text=\ i18n.get("ComicFormCancelButton").format(sstr(num_remaining)) self.Update()
def _query_issue(issue_ref, slow_data): ''' ComicVine implementation of the identically named method in the db.py ''' # interesting: can we implement a cache here? could speed things up... issue = Issue(issue_ref) dom = cvconnection._query_issue_details_dom(__api_key, sstr(issue_ref.issue_key)) __issue_parse_simple_stuff(issue, dom) __issue_parse_series_details(issue, dom) __issue_parse_story_credits(issue, dom) __issue_parse_summary(issue, dom) __issue_parse_roles(issue, dom) if slow_data: # grab extra cover images and a community rating score page = cvconnection._query_issue_details_page( __api_key, sstr(issue_ref.issue_key)) __issue_scrape_extra_details(issue, page) return issue
def __init__(self, issue_num_s, issue_key, title_s, thumb_url_s): ''' Initializes a newly created IssueRef, checking the given parameters to make sure they are legal, and then storing them as read-only properties. issue_key --> a database specific object (i.e. the 'memento' design pattern) that can be used by the database at a later date to unambiguously identify this comic book issue. This cannot be None, and it should have a useful __str__ method. It should also be unique for each comic book issue. issue_num_s --> a string describing this comic's issue number (which may not be a number at all, it can be '' or '1A' or 'A', etc. It cannot be None.) title_s --> a string describing the title of this comic book issue. if no title is available, pass in "" here. thumb_url_s --> the (http) url of an appropriate thumbnail image for this comic book issue (usually the cover.) if no image is available, pass in None here. ''' if not issue_key or len(sstr(issue_key).strip()) == 0 \ or issue_num_s is None: raise Exception() self.__issue_key = issue_key self.__issue_num_s = sstr(issue_num_s).strip() self.__title_s = title_s if utils.is_string(title_s) else "" # make sure thumb_url_s is either valid, or none (but not ''). self.__thumb_url_s = None if not thumb_url_s else sstr( thumb_url_s).strip() if self.__thumb_url_s == '': self.__thumb_url_s = None # used only for comparisons self._cmpkey_s = sstr(self.issue_key)
def __init__(self, crbook, scraper): ''' Construct a new PluginBookData. 'crbook' is one of the ComicBook objects that ComicRack directly passes to it's plugin scripts. 'comicrack' is a reference to the ComicRack App object. ''' super(PluginBookData, self).__init__(); if not ("ComicBook" in utils.sstr(type(crbook))): raise Exception("invalid backing ComicBook") # a quick function to make splitting ComicRack comicbook fields easier def split(s): return s.split(",") if s else [] # load our own copy of all data from the ComicRack database self.series_s = crbook.Series # don't use shadows! we'll parse these 3 self.issue_num_s = crbook.Number # values from the comic's filename our self.pub_year_n = crbook.Year # self if they are not present! self.pub_month_n = crbook.Month self.pub_day_n = crbook.Day self.rel_year_n = crbook.ReleasedTime.Year self.rel_month_n = crbook.ReleasedTime.Month self.rel_day_n = crbook.ReleasedTime.Day self.volume_year_n = crbook.ShadowVolume self.format_s = crbook.ShadowFormat self.title_s = crbook.Title self.crossovers_sl = split(crbook.AlternateSeries) self.summary_s = crbook.Summary self.publisher_s = crbook.Publisher self.imprint_s = crbook.Imprint self.characters_sl = split(crbook.Characters) self.teams_sl = split(crbook.Teams) self.locations_sl = split(crbook.Locations) self.writers_sl = split(crbook.Writer) self.pencillers_sl = split(crbook.Penciller) self.inkers_sl = split(crbook.Inker) self.colorists_sl = split(crbook.Colorist) self.letterers_sl = split(crbook.Letterer) self.cover_artists_sl = split(crbook.CoverArtist) self.editors_sl = split(crbook.Editor) self.tags_sl = split(crbook.Tags) self.notes_s = crbook.Notes self.path_s = crbook.FilePath self.webpage_s = crbook.Web self.rating_n = crbook.CommunityRating self.page_count_n = crbook.PageCount self.issue_key_s = crbook.GetCustomValue(PluginBookData.__ISSUE_KEY) self.series_key_s = crbook.GetCustomValue(PluginBookData.__SERIES_KEY) self.__crbook = crbook; self.__scraper = scraper;
def __init__(self, issue_num_s, issue_key, title_s, thumb_url_s): ''' Initializes a newly created IssueRef, checking the given parameters to make sure they are legal, and then storing them as read-only properties. issue_key --> a database specific object (i.e. the 'memento' design pattern) that can be used by the database at a later date to unambiguously identify this comic book issue. This cannot be None, and it should have a useful __str__ method. It should also be unique for each comic book issue. issue_num_s --> a string describing this comic's issue number (which may not be a number at all, it can be '' or '1A' or 'A', etc. It cannot be None.) title_s --> a string describing the title of this comic book issue. if no title is available, pass in "" here. thumb_url_s --> the (http) url of an appropriate thumbnail image for this comic book issue (usually the cover.) if no image is available, pass in None here. ''' if not issue_key or len(sstr(issue_key).strip()) == 0 \ or issue_num_s is None: raise Exception() self.__issue_key = issue_key self.__issue_num_s = sstr(issue_num_s).strip() self.__title_s = title_s if utils.is_string(title_s) else "" # make sure thumb_url_s is either valid, or none (but not ''). self.__thumb_url_s =None if not thumb_url_s else sstr(thumb_url_s).strip() if self.__thumb_url_s == '': self.__thumb_url_s = None # used only for comparisons self._cmpkey_s = sstr(self.issue_key)
def get_debug_string(self): """ Gets a simple little debug string summarizing this result.""" if self.equals("SKIP"): return "SKIP scraping this book" elif self.equals("PERMSKIP"): return "ALWAYS SKIP scraping this book" elif self.equals("CANCEL"): return "CANCEL this scrape operation" elif self.equals("BACK"): return "GO BACK to the series dialog" elif self.equals("OK"): return "SCRAPE using: '" + sstr(self.get_ref()) + "'" else: raise Exception()
def get_debug_string(self): ''' Gets a simple little debug string summarizing this result.''' if self.equals("SKIP"): return "SKIP scraping this book" elif self.equals("PERMSKIP"): return "ALWAYS SKIP scraping this book" elif self.equals("CANCEL"): return "CANCEL this scrape operation" elif self.equals("BACK"): return "GO BACK to the series dialog" elif self.equals("OK"): return "SCRAPE using: '" + sstr(self.get_ref()) + "'" else: raise Exception()
def _query_issue_details_dom(API_KEY, issueid_s): """ Performs a query that will obtain a dom containing the ComicVine API details for given issue. Never returns null, but may throw exceptions if there are problems. """ # {0} is the issue ID QUERY = "http://comicvine.com/api/issue/4000-{0}/?api_key=" + API_KEY + __CLIENTID + "&format=xml" if issueid_s is None or issueid_s == "": raise ValueError("bad parameters") url = QUERY.format(sstr(issueid_s)) return __get_dom(url)
def _query_issue_id_dom(API_KEY, seriesid_s, issue_num_s): ''' Performs a query that will obtain a dom containing the issue ID for the given issue number in the given series id. This method doesn't return null, but it may throw Exceptions. ''' # {0} is the series ID, an integer, and {1} is issue number, a string QUERY = 'http://comicvine.gamespot.com/api/issues/?api_key=' + API_KEY + \ __CLIENTID + '&format=xml&field_list=name,issue_number,id,image' + \ '&filter=volume:{0},issue_number:{1}' # cv does not play well with leading zeros in issue nums. see issue #403. issue_num_s = sstr(issue_num_s).strip() if len(issue_num_s) > 0: # fix issue 411 issue_num_s = issue_num_s.lstrip('0').strip() issue_num_s = issue_num_s if len(issue_num_s) > 0 else '0' if not seriesid_s or not issue_num_s: raise ValueError('bad parameters') return __get_dom( QUERY.format(sstr(seriesid_s), HttpUtility.UrlPathEncode(sstr(issue_num_s))))
def _query_issue_details_dom(API_KEY, issueid_s): ''' Performs a query that will obtain a dom containing the ComicVine API details for given issue. Never returns null, but may throw exceptions if there are problems. ''' # {0} is the issue ID QUERY = 'http://comicvine.com/api/issue/4000-{0}/?api_key=' \ + API_KEY + __CLIENTID + '&format=xml' if issueid_s is None or issueid_s == '': raise ValueError('bad parameters') url = QUERY.format(sstr(issueid_s)) return __get_dom(url)
def _query_series_details_dom(API_KEY, seriesid_s): ''' Performs a query that will obtain a dom containing the start year and publisher for the given series ID. This method doesn't return null, but it may throw Exceptions. ''' # {0} is the series id, an integer. QUERY = 'http://comicvine.com/api/volume/4050-{0}/?api_key=' \ + API_KEY + __CLIENTID + '&format=xml' \ + '&field_list=name,start_year,publisher,image,count_of_issues,id' # parsing relies on 'field_list' specifying 2 or more elements!! if seriesid_s is None or seriesid_s == '': raise ValueError('bad parameters') return __get_dom( QUERY.format(sstr(seriesid_s) ) )
def _query_series_details_dom(API_KEY, seriesid_s): ''' Performs a query that will obtain a dom containing the start year and publisher for the given series ID. This method doesn't return null, but it may throw Exceptions. ''' # {0} is the series id, an integer. QUERY = 'http://comicvine.com/api/volume/4050-{0}/?api_key=' \ + API_KEY + __CLIENTID + '&format=xml' \ + '&field_list=name,start_year,publisher,image,count_of_issues,id' # parsing relies on 'field_list' specifying 2 or more elements!! if seriesid_s is None or seriesid_s == '': raise ValueError('bad parameters') return __get_dom(QUERY.format(sstr(seriesid_s)))
def runTest(self): ''' Checks to see if the filename for this test parses correctly. ''' expected_series = self.__testdata[1] expected_issue_num = self.__testdata[2] expected_year = self.__testdata[3] filename = self.__testdata[0] try: actual_series, actual_issue_num, actual_year = extract(filename) except Exception as e: self.assertFalse(True, "Unexpected error parsing: " + filename + "\n" + utils.sstr(e) ) error = 'error parsing filename "' + filename + '"\n -->' +\ 'got series "' + actual_series + '", issue "' + actual_issue_num +\ '" and year "' + actual_year + '"' self.assertEqual(expected_series, actual_series, error) self.assertEqual(expected_issue_num, actual_issue_num, error) self.assertEqual(expected_year, actual_year, error)
def __debug_raw(self, message=''): """ Records the given message, and writes it out to the 'real' stdout. """ # protect access to the logLines with a mutex (for multiple threads) self._mutex.WaitOne(-1) try: if self._loglines == None: raise Exception("you must install the __Logger before using it") try: output_line = utils.sstr(message) except: # shouldn't happen! output_line = "***** LOGGING ERROR *****" self._loglines.append( output_line ) sys.__stdout__.write(output_line) finally: self._mutex.ReleaseMutex()
def debug_exc(self, message): """ Implements the module-level debug_exc() method. """ if not (message is None) and len(message.strip()) > 0: self.debug(message) try: self.debug(''.join(['Caught ', sys.exc_info()[0].__name__, ': ', utils.sstr(sys.exc_info()[1])])) except: self.debug(": Exception name couldn't be formatted :") try: self.debug("Traceback (most recent call last):") for line in self.__get_trace(): self.debug(self.__format_trace_line(line)) except: self.debug(": Traceback couldn't be formatted :") self.debug()