def downloadComic(cengine, comic_url, script_args): """ Downloads the chapters of a comic Displays any failed chapters after execution """ feedback.info("Downloading %s"%comic_url) comic = cengine.Comic(comic_url) chapter_urls = comic.getChapterUrls() comic_dir = comic.getComicLowerName() feedback.info(" %i chapters (total)" % len(chapter_urls)) if script_args.count_chapters: exit(0) failed_chapters = {} for url in chapter_urls: failed_urls = downloadChapter(cengine, url, comic_dir) if failed_urls == 'max': # exceeded max chapter break elif failed_urls == 0: continue # not reached min chapter elif len(failed_urls) > 0: feedback.warn("Failed %s"%url) failed_chapters[url] = failed_urls return failed_chapters
def downloadPage(cengine, page_url, chapter_dir): """ Download an individual page Takes care of zero-padding page numbers """ feedback.info(" Fetch %s"%abbreviateUrl(page_url) ) page = cengine.Page(page_url) image_url = page.getImageUrl() resource = web.WebResource(image_url) # TODO pre-detect existing pages, don't re-download image_file = os.path.sep.join( [chapter_dir, 'page_' + page.getPageNumber().zfill(4) + '.' + resource.getExtension()] ) resource.saveTo(image_file)
def compile(self, remove_dir=False): """ Compile a CBZ file from the target folder remove_dir : whether to remove the source directory once the CBZ is created """ # Target folder does not exist, or no files in target folder (someone's "clever" syntax) if (not os.path.isdir(self.folder) or len([ name for name in os.listdir(self.folder) if os.path.isfile(self.folder + os.path.sep + name) ]) == 0): return feedback.info(" Compiling CBZ for %s" % self.folder) shutil.make_archive(self.folder, "zip", self.folder) shutil.move(self.folder + ".zip", self.folder + ".cbz") if remove_dir: shutil.rmtree(self.folder)
def checkState(args): if args.failed: if dlstate.has("failed_chapters") and dlstate.get("failed_chapters") != None: feedback.warn(str(dlstate.get("failed_chapters") ) ) else: feedback.info("No failures to report.") exit(0) elif args.last: if dlstate.has("last"): feedback.info(str(dlstate.get("last") ) ) else: feedback.info("No failures to report.") exit(0)
def downloadChapter(cengine, chapter_url, comic_dir): """ Kicks off the page downloads for a chapter Checks whether chapter number is within specified bounds On completion, if there were no page download errors, attempts CBZ creation Returns number of errors encountered """ feedback.debug("Start on %s ..."%chapter_url) global step_delay global ch_start global ch_end chapter = cengine.Chapter(chapter_url) chapter_num = float(chapter.getChapterNumber() ) if chapter_num < ch_start: return 0 elif chapter_num > ch_end: return 'max' # IF no start was specified THEN use the last success as base if ch_start == -1 and chapter_num <= dlstate.get("last"): return 0 feedback.info(" Get %s"%chapter_url) page_urls = chapter.getPageUrls() if page_urls == None: return ['%s not a valid chapter'%chapter_num] chapter_dir = os.path.sep.join([comic_dir, chapter.getChapterLowerName()]) feedback.info(" %i pages"%len(page_urls)) failed_urls = [] for url in page_urls: try: downloadPage(cengine, url, chapter_dir) except ComicEngine.ComicError as e: feedback.warn("Oops : %s"%str(e) ) failed_urls.append(url) except urllib.error.URLError as e: feedback.warn("Could not download %s"%url) failed_urls.append(url) except web.DownloadError as e: feedback.warn("%i : %s"%(e.code,str(e)) ) failed_urls.append(url) time.sleep(step_delay) if len(failed_urls) == 0: feedback.debug(" Compiling to CBZ ...") try: cbz.CBZArchive(chapter_dir).compile(remove_dir=True) dlstate.set("last", chapter_num) # Inequivocable success ! except Exception as e: feedback.warn( str(e) ) errors += 1 return failed_urls
def __init__(self, experiment_config_name, extra_dict={}, config_roots=[''], getexp=False): '''Read experiment config to get basic settings TODO: probably nicer if default experiment is given as argument ''' # State variables self.version_info_missing = False # # Helper functions # def split_jobs(config): '''Post-process job definition to allow for shared configs as [[job1, job2]]''' if 'jobs' in config: sep = re.compile(r'\s*,\s*') for subjobs, subconfig in config['jobs'].iteritems(): if re.search(sep, subjobs): for subjob in re.split(sep, subjobs): if subjob in config['jobs']: config['jobs'][subjob].merge(subconfig.dict()) else: config['jobs'][subjob] = subconfig.dict() del config['jobs'][subjobs] def get_config_name(lib_name, base_name): '''Cycle through config path until a match is found. Return simple path otherwise''' config_name = os.path.join(lib_name, base_name) for config_root in config_roots: tentative_name = os.path.join(config_root, config_name) if os.path.exists(tentative_name): config_name = tentative_name break return config_name def read_value(value): if os.path.exists(value): stream = open(value) result = stream.read().strip() stream.close() else: result = '' return result def sec2time(seconds): '''Create time string (HH:MM:SS) from second of day''' seconds = int(seconds) if seconds >= 86400: raise ValueError("invalid second of day '{0}'".format(seconds)) minutes, s = divmod(seconds, 60) h, m = divmod(minutes, 60) return "{0:02}:{1:02}:{2:02}".format(h, m, s) def split_date(value): '''Re-format datetime string to list for use in namelists''' match = re.match(r'^0*(\d+)-0*(\d+)-0*(\d+)' r'([T ]0*(\d+)(:0*(\d+)(:0*(\d+))?)?)?$', value) if match: return [match.groups('0')[i] for i in [0,1,2,4,6,8]] match = re.match(r'^0*(\d+?)(\d{2})(\d{2})' r'([T ]0*(\d+)(:0*(\d+)(:0*(\d+))?)?)?$', value) if match: return [match.groups('0')[i] for i in [0,1,2,4,6,8]] raise ValueError("invalid date/time '{0}'".format(value)) def add_years(value, years): '''Add specified number of years (possible negative) to date''' years = int(years) dt = map(int, split_date(value)) dt[0] += years return "{0:+05}-{1:02}-{2:02}".format(*dt).lstrip('+') def add_days(value, days): '''Add specified number of days (possible negative) to date''' def leap(year): return (not year % 4) and (not (not year % 100) or (not year % 400)) def monlen(year, mon): monlens = (0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, 0) return monlens[mon] + (mon == 2 and leap(year)) def add_days_(year, mon, day, days): while True: if mon == 0: year -= 1 mon = 12 day = monlen(year, 12) continue if mon == 13: year += 1 mon = 1 day = 1 continue if day + days <= 0: days += day mon -= 1 day = monlen(year, mon) continue if day + days > monlen(year, mon): days -= monlen(year, mon) - day + 1 mon += 1 day = 1 continue day += days break return (year, mon, day) days = int(days) dt = map(int, split_date(value)) dt = add_days_(dt[0], dt[1], dt[2], days) return "{0:+05}-{1:02}-{2:02}".format(*dt).lstrip('+') def eval_value(value): ''' Evaluate key as python expression, return as string or sequence of strings. ''' result = eval(value) if isinstance(result, (list, tuple)): result = map(str, result) else: result = str(result) return result def eval_value_string(value): ''' Evaluate key as python expression, return as string or sequence of strings. ''' result = eval_value(value) if isinstance(result, (list, tuple)): result = ", ".join(result) return result def eval_expression(value): ''' Check if value is a supported expression. If so, evaluate and return result, otherwise just pass through. ''' match = re.match(r'^eval\((.*)\)$', value, re.S) if match: return eval_value(match.group(1)) match = re.match(r'^evals\((.*)\)$', value, re.S) if match: return eval_value_string(match.group(1)) match = re.match(r'^add_(years|days)\(\s*([-\d]+([T ][\d:]+)?)\s*,\s*([-+]?\d+)\s*\)$', value, re.S) if match: if match.group(1) == 'days': return add_days(match.group(2), match.group(4)) return add_years(match.group(2), match.group(4)) match = re.match(r'^split_date\((.*)\)$', value, re.S) if match: return split_date(match.group(1)) match = re.match(r'^sec2time\((.*)\)$', value, re.S) if match: return sec2time(match.group(1)) match = re.match(r'^read\((.*)\)$', value, re.S) if match: return read_value(match.group(1)) return value # Interpolate and evaluate keys if they are an expression def eval_key(section, key): try: value = section[key] if isinstance(value, (list, tuple)): value = map(eval_expression, value) elif isinstance(value, basestring): value = eval_expression(value) if isinstance(value, (list, tuple)): value = [v.replace('$', '$$') for v in value] elif isinstance(value, basestring): value = value.replace('$', '$$') except (InterpolationError, ValueError) as error: raise ExpConfigError(error.message, key) section[key] = value # Undo remaining changes from walk with eval_key def uneval_key(section, key): try: value = section[key] if isinstance(value, (list, tuple)): value = [v.replace('$$', '$') for v in value] elif isinstance(value, basestring): value = value.replace('$$', '$') except (InterpolationError, ValueError) as error: raise ExpConfigError(error.message, key) section[key] = value # Move version info from local config to global list def register_version(pre_config, config_versions): if 'VERSION_' in pre_config: config_versions.append(pre_config['VERSION_']) del pre_config['VERSION_'] else: self.version_info_missing = True # # Method body # # Pre-read basic experiment settings pre_config = None setup_config_name = get_config_name('', ExpConfig.setup_config_name) if os.path.exists(setup_config_name): pre_config = ConfigObj(setup_config_name, interpolation=False) user_config = ConfigObj(experiment_config_name, interpolation=False) if pre_config: pre_config.merge(user_config) else: pre_config = user_config experiment_type = extra_dict.get('EXP_TYPE', pre_config['EXP_TYPE']) # Empty environment should load default environment = extra_dict.get('ENVIRONMENT', pre_config.get('ENVIRONMENT', ExpConfig.default_name)) # Options should always be treated as a list setup_options = extra_dict.get('SETUP_OPTIONS', pre_config.get('SETUP_OPTIONS', '')) if isinstance(setup_options, basestring): if setup_options: setup_options = [setup_options] else: setup_options = [] exp_options = extra_dict.get('EXP_OPTIONS', pre_config.get('EXP_OPTIONS', '')) if isinstance(exp_options, basestring): if exp_options: exp_options = [exp_options] else: exp_options = [] options = setup_options + exp_options # Backwards compatibility ENVIRONMENT -> QUEUE_TYPE if environment == ExpConfig.default_name and 'QUEUE_TYPE' in pre_config: feedback.warning("found obsolete keyword 'QUEUE_TYPE'; " "should be replaced by 'ENVIRONMENT'") environment = pre_config['QUEUE_TYPE'] # Load default if environment was deliberately set to empty if not environment: environment = ExpConfig.default_name pre_config = None user_config = None # Start from empty configuration pre_config = ConfigObj(interpolation=False) config_versions = [] # Get default experiment id from file name pre_config[ExpConfig.id_name] = os.path.splitext( os.path.basename(experiment_config_name) )[0] # Read Environment env_dict = dict(os.environ) if not getexp: # Mask literal dollar characters for key, value in env_dict.iteritems(): env_dict[key] = value.replace('$', '$$') pre_config.merge({'DEFAULT': {}}) for key, value in sorted(env_dict.iteritems()): pre_config['DEFAULT'][key] = value # Read experiment settings from library (default and type specific) lib_config_name = get_config_name(ExpConfig.exp_lib_dir, ExpConfig.default_name+'.config') pre_config.merge(ConfigObj(lib_config_name, interpolation=False)) split_jobs(pre_config) register_version(pre_config, config_versions) if os.path.exists(setup_config_name): pre_config.merge(ConfigObj(setup_config_name, interpolation=False)) split_jobs(pre_config) register_version(pre_config, config_versions) lib_config_name = get_config_name(ExpConfig.exp_lib_dir, experiment_type+'.config') if os.path.exists(lib_config_name): pre_config.merge(ConfigObj(lib_config_name, interpolation=False)) split_jobs(pre_config) register_version(pre_config, config_versions) else: feedback.warning("cannot find experiment config for '%s', "+ "using default only", experiment_type) for option in options: lib_config_name = get_config_name(ExpConfig.opt_lib_dir, option+'.config') if os.path.exists(lib_config_name): pre_config.merge(ConfigObj(lib_config_name, interpolation=False)) split_jobs(pre_config) register_version(pre_config, config_versions) else: feedback.warning("cannot find config for option '%s', using "+ "default/experiment type only", option) # Read host environment settings from library lib_config_name = get_config_name(ExpConfig.env_lib_dir, environment+'.config') if os.path.exists(lib_config_name): pre_config.merge(ConfigObj(lib_config_name, interpolation=False)) register_version(pre_config, config_versions) # Warn user if at least one config had no version info if self.version_info_missing: feedback.info("version info for standard config is incomplete") # Re-read config to allow overriding default settings # TODO: probably nicer if default experiment is given as argument experiment_config = ConfigObj(experiment_config_name, interpolation=False) pre_config.merge(experiment_config) split_jobs(pre_config) # Add extra dictionary pre_config.merge(extra_dict) # Backwards compatibility ENVIRONMENT -> QUEUE_TYPE pre_config['ENVIRONMENT'] = environment # Add complete versioning info if not getexp: pre_config['VERSIONS_'] = config_versions # Re-read merged config with interpolation set. # This works around incomprehensible inheritance of interpolation with # merge. Make sure that all values are interpolated config_lines = StringIO.StringIO() pre_config.write(config_lines) pre_config = None config_lines.seek(0) pre_config = ConfigObj(config_lines, interpolation=False if getexp else 'template') # Extract experiment description from initial comment # if not set explicitly if not pre_config.has_key('EXP_DESCRIPTION'): is_empty = lambda s: re.match(r'^[\s#]*$', s) rm_comment = lambda s: re.sub(r'^\s*# ?', '', s) pre_config['EXP_DESCRIPTION'] = "\n".join( reversed(list( dropwhile(is_empty, reversed(list( dropwhile(is_empty, map(rm_comment, experiment_config.initial_comment) ) )) ) )) ) pre_config.walk(eval_key) # Re-read final config without interpolation. # This allows copying data without evaluation of version keywords. config_lines.seek(0) config_lines.truncate() pre_config.write(config_lines) pre_config = None config_lines.seek(0) ConfigObj.__init__(self, config_lines, interpolation=False) self.walk(uneval_key) self.experiment_id = self[ExpConfig.id_name] self.experiment_kind = re.sub(r'-\w+$', '', experiment_type)