def _create_key_tag_s(issue_key): ''' ComicVine implementation of the identically named method in the db.py ''' try: return "CVDB" + utils.sstr(int(issue_key)) except: log.debug_exc("Couldn't create key tag out of: " + sstr(issue_key)) return None
def __issue_scrape_extra_details(issue, page): ''' Parse additional details from the issues ComicVine webpage. ''' if page: # first pass: find all the alternate cover image urls regex = re.compile( \ r'(?mis)\<\s*div[^\>]*img imgboxart issue-cover[^\>]+\>(.*?)div\s*>') for div_s in re.findall( regex, page )[1:]: inner_search_results = re.search(\ r'(?i)\<\s*img\s+.*src\s*=\s*"([^"]*)', div_s) if inner_search_results: image_url_s = inner_search_results.group(1) if image_url_s: issue.image_urls_sl.append(image_url_s) # second pass: find the community rating (stars) for this comic regex = re.compile(\ r'(?mis)\<span class="average-score"\>(\d+\.?\d*) stars?\</span\>') results = re.search( regex, page ) if results: try: rating = float(results.group(1)) if rating > 0: issue.rating_n = rating except: log.debug_exc("Error parsing rating for " + sstr(issue) + ": ")
def regex(filename_s, regex_s): ''' Takes the filename of a comic book, and extracts three strings out of it using the given regular expression, which must match the filename and create regex groups called "series", "num", and "year". The extracted details will be the series name, the issue number, and the issue year. These three details are returned as a triple, i.e. ("batman", "344", "2004"). As long as AT LEAST a series name is found, this function will return the triple (missing values will be ""). Otherwise, it returns None. ''' global __failed_regex results = None if regex_s != __failed_regex: try: match = re.match(regex_s, filename_s) if match: founddict = { x: match.group(x) for x in match.groupdict() if match.group(x) and match.group(x).strip() } if "series" in founddict: results = ( match.group("series"), match.group("num") if "num" in founddict else "", match.group("year") if "year" in founddict else "") except: log.debug_exc("regex filename parsing failed:") __failed_regex = regex_s results = None return results
def persist_map(map, file): """ Writes the given map of strings-to-values into a file, by converting all of its values into strings. Any key value pair that contains the ':' character will not be written out. All other contents that were in the given file will be destroyed. Returns True on success, False on failure. """ try: import log with StreamWriter(file, False, Encoding.UTF8) as sw: sw.Write(":: This file was generated on "\ + strftime(r'%Y.%m.%d %X') + "\n\n") keys = map.keys() keys.sort() for key in keys: value = sstr(map[key]).strip() key = sstr(key).strip() if ':' in key or ':' in value: log.debug( "WARNING: can't write map entry containing ':'; ", key, " -> ", value) else: sw.Write(key + ' : ' + value + "\n") return True except: log.debug_exc("problem persisting map to file: " + sstr(file)) return False
def _create_key_tag_s(issue_key): """ ComicVine implementation of the identically named method in the db.py """ try: return "CVDB" + utils.sstr(int(issue_key)) except: log.debug_exc("Couldn't create key tag out of: " + sstr(issue_key)) return None
def persist_map(map, file): """ Writes the given map of strings-to-values into a file, by converting all of its values into strings. Any key value pair that contains the ':' character will not be written out. All other contents that were in the given file will be destroyed. Returns True on success, False on failure. """ try: import log with StreamWriter(file, False, Encoding.UTF8) as sw: sw.Write(":: This file was generated on "\ + strftime(r'%Y.%m.%d %X') + "\n\n") keys = map.keys() keys.sort() for key in keys: value = sstr(map[key]).strip() key = sstr(key).strip() if ':' in key or ':' in value: log.debug("WARNING: can't write map entry containing ':'; ", key, " -> ", value) else: sw.Write(key + ' : ' + value + "\n") return True except: log.debug_exc("problem persisting map to file: " + sstr(file)) return False
def __validate_environment(): ''' Checks to see if the current environment is valid to run this script in. If it is not, an error message is displayed to explain the problem. Returns True if the current environment is valid, False if it is not. ''' # the minimum versions required for a valid environment REQUIRED_MAJOR=0 REQUIRED_MINOR=9 REQUIRED_BUILD=165 valid_environment = True try: version = re.split(r'\.', ComicRack.App.ProductVersion) def hash( major, minor, build ): return float(sstr(major * 5000 + minor) + "." + sstr(build)) valid_environment = \ hash(int(version[0]),int(version[1]), int(version[2])) >= \ hash(REQUIRED_MAJOR, REQUIRED_MINOR, REQUIRED_BUILD) if not valid_environment: log.debug("WARNING: script requires ComicRack ", REQUIRED_MAJOR, '.', REQUIRED_MINOR, '.', REQUIRED_BUILD, ' or higher. Exiting...') MessageBox.Show( ComicRack.MainWindow, i18n.get("ComicRackOODText"), i18n.get("ComicRackOODTitle"), MessageBoxButtons.OK, MessageBoxIcon.Warning) except: log.debug_exc("WARNING: couldn't validate comicrack version") valid_environment = True return valid_environment
def _check_magic_file(path_s): ''' ComicVine implementation of the identically named method in the db.py ''' series_ref = None file_s = None try: # 1. get the directory to search for a cvinfo file in, or None dir_s = path_s if path_s and Directory.Exists(path_s) else \ Path.GetDirectoryName(path_s) if path_s else None dir_s = dir_s if dir_s and Directory.Exists(dir_s) else None if dir_s: # 2. search in that directory for a properly named cvinfo file # note that Windows filenames are not case sensitive. for f in [dir_s + "\\" + x for x in ["cvinfo.txt", "cvinfo"]]: if File.Exists(f): file_s = f # 3. if we found a file, read it's contents in, and parse the # comicvine series id out of it, if possible. if file_s: with StreamReader(file_s, Encoding.UTF8, False) as sr: line = sr.ReadToEnd() line = line.strip() if line else line series_ref = __url_to_seriesref(line) except: log.debug_exc("bad cvinfo file: " + sstr(file_s)) if file_s and not series_ref: log.debug("ignoring bad cvinfo file: ", sstr(file_s)) return series_ref # may be None!
def regex( filename_s, regex_s ): ''' Takes the filename of a comic book, and extracts three strings out of it using the given regular expression, which must match the filename and create regex groups called "series", "num", and "year". The extracted details will be the series name, the issue number, and the issue year. These three details are returned as a triple, i.e. ("batman", "344", "2004"). As long as AT LEAST a series name is found, this function will return the triple (missing values will be ""). Otherwise, it returns None. ''' global __failed_regex results = None if regex_s != __failed_regex: try: match = re.match(regex_s, filename_s) if match: founddict = { x : match.group(x) for x in match.groupdict() if match.group(x) and match.group(x).strip() } if "series" in founddict: results = ( match.group("series"), match.group("num") if "num" in founddict else "", match.group("year") if "year" in founddict else "" ) except: log.debug_exc("regex filename parsing failed:") __failed_regex = regex_s results = None return results
def extract(filename_s): ''' Takes the filename of a comic book, and extracts three strings out of it: the series name, the issue number, and the issue year. These three pieces of information are returned as a triple, i.e. ("batman", "344", "2004"). This function never returns None, and it will ALWAYS return the triple with at least a non-empty series name (even if it is just "unknown"), but the issue number and year may be "" if they couldn't be determined. ''' # remove the file extension, unless it's the whole filename name_s = Path.GetFileName(filename_s.strip()) if "ero" in name_s: log.debug(name_s) last_period = name_s.rfind(r".") name_s = name_s if last_period <= 0 else name_s[0:last_period] # see if the comic matches the following format, and if so, remove everything # after the first number: # "nnn series name #xx (etc) (etc)" -> "series name #xx (etc) (etc)" match = re.match( r"^\s*(\d+)[\s._-]+" + # "nnn" r"([^#]+?" + # "series name" r"#-?\d+.*)", name_s) # "#xx (etc) (etc)" if match: name_s = match.group(2) # see if the comic matches the following format, and if so, remove everything # after the first number that isn't in brackets: # "series name #xxx - title (etc) (etc)" -> "series name #xxx (ect) (etc) match = re.match( r"^((?:[a-zA-Z,.-]+\s+)+" + # "series name" r"#?(?:\d+[.0-9]*))\s*(?:-)" + # "#xxx -" r".*?((\(.*)?)$", name_s) # "title (etc) (etc)" if match: log.debug(name_s) name_s = match.group(1) + " " + match.group(2) log.debug(" -> ", name_s) # try the extraction. if anything goes wrong, or if we come up with a blank # series name, revert to the filename (without extension) as series name try: retval = __extract(name_s) if retval[0].strip() == "": raise Exception("parsed blank series name") except: log.debug_exc("Recoverable error extracting from '" + name_s + "':") retval = name_s, "", "" return retval
def persist_string(s, file): """ Writes the given stringsinto a file. All other contents that were in the given file will be destroyed. Returns True on success, False on failure. """ try: import log with StreamWriter(file, False, Encoding.UTF8) as sw: sw.Write(s) return True except: log.debug_exc("problem persisting string to file: " + sstr(file)) return False
def load_string(file): """ Reads a string containing the contents of the given file. If this given file doesn't exist, or an error occurs, this method returns an empty string. """ retval = "" try: if File.Exists(file): with StreamReader(file, Encoding.UTF8, False) as sr: retval = sr.ReadToEnd() except: import log log.debug_exc("problem loading string from file " + sstr(file)) retval = "" return retval
def _check_magic_file(path_s): ''' ComicVine implementation of the identically named method in the db.py ''' series_key_s = None file_s = None try: # 1. get the directory to search for a cvinfo file in, or None dir_s = path_s if path_s and Directory.Exists(path_s) else \ Path.GetDirectoryName(path_s) if path_s else None dir_s = dir_s if dir_s and Directory.Exists(dir_s) else None if dir_s: # 2. search in that directory for a properly named cvinfo file # note that Windows filenames are not case sensitive. for f in [dir_s + "\\" + x for x in ["cvinfo.txt", "cvinfo"]]: if File.Exists(f): file_s = f # 3. if we found a file, read it's contents in, and parse the # comicvine series id out of it, if possible. if file_s: with StreamReader(file_s, Encoding.UTF8, False) as sr: line = sr.ReadToEnd() line = line.strip() if line else line match = re.match(r"^.*?\b(49|4050)-(\d{2,})\b.*$", line) line = match.group(2) if match else line if utils.is_number(line): series_key_s = utils.sstr(int(line)) except: log.debug_exc("bad cvinfo file: " + sstr(file_s)) # 4. did we find a series key? if so, query comicvine to build a proper # SeriesRef object for that series key. series_ref = None if series_key_s: try: dom = cvconnection._query_series_details_dom( __api_key, utils.sstr(series_key_s)) num_results_n = int(dom.number_of_total_results) series_ref =\ __volume_to_seriesref(dom.results) if num_results_n==1 else None except: log.debug_exc("error getting SeriesRef for: " + sstr(series_key_s)) if file_s and not series_ref: log.debug("ignoring bad cvinfo file: ", sstr(file_s)) return series_ref # may be None!
def extract( filename_s ): ''' Takes the filename of a comic book, and extracts three strings out of it: the series name, the issue number, and the issue year. These three pieces of information are returned as a triple, i.e. ("batman", "344", "2004"). This function never returns None, and it will ALWAYS return the triple with at least a non-empty series name (even if it is just "unknown"), but the issue number and year may be "" if they couldn't be determined. ''' # remove the file extension, unless it's the whole filename name_s = Path.GetFileName(filename_s.strip()) last_period = name_s.rfind(r".") name_s = name_s if last_period <= 0 else name_s[0:last_period] # see if the comic matches the following format, and if so, remove everything # after the first number: # "nnn series name #xx (etc) (etc)" -> "series name #xx (etc) (etc)" match = re.match(r"^\s*(\d+)[\s._-]+" + # "nnn" r"([^#]+?" + # "series name" r"#-?\d+.*)", name_s) # "#xx (etc) (etc)" if match: name_s = match.group(2) # see if the comic matches the following format, and if so, remove everything # after the first number that isn't in brackets: # "series name #xxx - title (etc) (etc)" -> "series name #xxx (ect) (etc) match = re.match(r"^((?:[a-zA-Z,.-]+\s+)+" + # "series name" r"#?(?:\d+[.0-9]*))\s*(?:-)" + # "#xxx -" r".*?((\(.*)?)$", name_s) # "title (etc) (etc)" if match: log.debug(name_s) name_s = match.group(1) + " " + match.group(2) log.debug(" -> ", name_s) # try the extraction. if anything goes wrong, or if we come up with a blank # series name, revert to the filename (without extension) as series name try: retval = __extract(name_s) if retval[0].strip() == "": raise Exception("parsed blank series name") except: log.debug_exc("Recoverable error extracting from '" + name_s + "':") retval = name_s, "", "" return retval
def _query_image( ref, lasttry = False ): ''' ComicVine implementation of the identically named method in the db.py ''' retval = None # the Image object that we will return # 1. determine the URL image_url_s = None if isinstance(ref, SeriesRef): image_url_s = ref.thumb_url_s elif isinstance(ref, IssueRef): image_url_s = ref.thumb_url_s elif is_string(ref): image_url_s = ref # 2. attempt to load the image for the URL if image_url_s: response = None response_stream = None try: cvconnection.wait_until_ready() # throttle our request speed request = WebRequest.Create(image_url_s) request.UserAgent = "[ComicVineScraper, version " + \ Resources.SCRIPT_VERSION + "]" response = request.GetResponse() response_stream = response.GetResponseStream() retval = Image.FromStream(response_stream) except: if lasttry: log.debug_exc('ERROR retry image load failed:') retval = None else: log.debug('RETRY loading image -> ', image_url_s) retval = _query_image( ref, True ) finally: if response: response.Dispose() if response_stream: response_stream.Dispose() # if this value is stil None, it means an error occurred, or else comicvine # simply doesn't have any Image for the given ref object return retval
def _query_image(ref, lasttry=False): """ ComicVine implementation of the identically named method in the db.py """ retval = None # the Image object that we will return # 1. determine the URL image_url_s = None if isinstance(ref, SeriesRef): image_url_s = ref.thumb_url_s elif isinstance(ref, IssueRef): image_url_s = ref.thumb_url_s elif is_string(ref): image_url_s = ref # 2. attempt to load the image for the URL if image_url_s: response = None response_stream = None try: request = WebRequest.Create(image_url_s) response = request.GetResponse() response_stream = response.GetResponseStream() retval = Image.FromStream(response_stream) except: if lasttry: log.debug_exc("ERROR retry image load failed:") retval = None else: log.debug("RETRY loading image -> ", image_url_s) retval = _query_image(ref, True) finally: if response: response.Dispose() if response_stream: response_stream.Dispose() # if this value is stil None, it means an error occurred, or else comicvine # simply doesn't have any Image for the given ref object return retval
def __validate_environment(): ''' Checks to see if the current environment is valid to run this script in. If it is not, an error message is displayed to explain the problem. Returns True if the current environment is valid, False if it is not. ''' # the minimum versions required for a valid environment REQUIRED_MAJOR = 0 REQUIRED_MINOR = 9 REQUIRED_BUILD = 165 valid_environment = True try: version = re.split(r'\.', ComicRack.App.ProductVersion) def vhash(major, minor, build): return float(sstr(major * 5000 + minor) + "." + sstr(build)) valid_environment = \ vhash(int(version[0]), int(version[1]), int(version[2])) >= \ vhash(REQUIRED_MAJOR, REQUIRED_MINOR, REQUIRED_BUILD) if not valid_environment: log.debug("WARNING: script requires ComicRack ", REQUIRED_MAJOR, '.', REQUIRED_MINOR, '.', REQUIRED_BUILD, ' or higher. Exiting...') MessageBox.Show(ComicRack.MainWindow, i18n.get("ComicRackOODText"), i18n.get("ComicRackOODTitle"), MessageBoxButtons.OK, MessageBoxIcon.Warning) except Exception: log.debug_exc("WARNING: couldn't validate comicrack version") valid_environment = True return valid_environment
def load_map(file): """ Reads a map out of the given file, which was created with the persist_map function. All keys in the returned map will be strings, but the values will be converted to integers, booleans and floats where possible. If this given file doesn't exist or an error occurs, this method returns an empty map. """ retval = {} try: if File.Exists(file): with StreamReader(file, Encoding.UTF8, False) as sr: line = sr.ReadLine() while line is not None: pair = line.strip().split(':') if len(pair) == 2: key = pair[0].strip() value = pair[1].strip() if value.lower() == "false": value = False elif value.lower() == "true": value = True else: try: if '.' in value: value = float(value) else: value = int(value) except: pass retval[key] = value line = sr.ReadLine() except: import log log.debug_exc("problem loading map from file " + sstr(file)) retval = {} return retval