Exemple #1
0
def downloadComic(cengine, comic_url, script_args):
    """ Downloads the chapters of a comic

    Displays any failed chapters after execution
    """
    feedback.info("Downloading %s"%comic_url)

    comic        = cengine.Comic(comic_url)
    chapter_urls = comic.getChapterUrls()
    comic_dir    = comic.getComicLowerName()

    feedback.info("  %i chapters (total)" % len(chapter_urls))

    if script_args.count_chapters:
        exit(0)

    failed_chapters = {}
    for url in chapter_urls:
        failed_urls = downloadChapter(cengine, url, comic_dir)

        if failed_urls == 'max':
            # exceeded max chapter
            break

        elif failed_urls == 0:
            continue # not reached min chapter

        elif len(failed_urls) > 0:
            feedback.warn("Failed %s"%url)
            failed_chapters[url] = failed_urls

    return failed_chapters
Exemple #2
0
def downloadPage(cengine, page_url, chapter_dir):
    """ Download an individual page

    Takes care of zero-padding page numbers
    """
    feedback.info("    Fetch %s"%abbreviateUrl(page_url) )
    page        = cengine.Page(page_url)

    image_url   = page.getImageUrl()
    resource    = web.WebResource(image_url)
    # TODO pre-detect existing pages, don't re-download
    image_file  = os.path.sep.join( [chapter_dir, 'page_' + page.getPageNumber().zfill(4) + '.' + resource.getExtension()] )

    resource.saveTo(image_file)
Exemple #3
0
    def compile(self, remove_dir=False):
        """ Compile a CBZ file from the target folder

        remove_dir : whether to remove the source directory once the CBZ is created
        """
        # Target folder does not exist, or no files in target folder (someone's "clever" syntax)
        if (not os.path.isdir(self.folder) or len([
                name for name in os.listdir(self.folder)
                if os.path.isfile(self.folder + os.path.sep + name)
        ]) == 0):
            return

        feedback.info("  Compiling CBZ for %s" % self.folder)
        shutil.make_archive(self.folder, "zip", self.folder)
        shutil.move(self.folder + ".zip", self.folder + ".cbz")

        if remove_dir:
            shutil.rmtree(self.folder)
Exemple #4
0
def checkState(args):
    if args.failed:
        if dlstate.has("failed_chapters") and dlstate.get("failed_chapters") != None:
            feedback.warn(str(dlstate.get("failed_chapters") ) )
        else:
            feedback.info("No failures to report.")
        exit(0)
    elif args.last:
        if dlstate.has("last"):
            feedback.info(str(dlstate.get("last") ) )
        else:
            feedback.info("No failures to report.")
        exit(0)
Exemple #5
0
def downloadChapter(cengine, chapter_url, comic_dir):
    """ Kicks off the page downloads for a chapter

    Checks whether chapter number is within specified bounds
    
    On completion, if there were no page download errors, attempts CBZ creation

    Returns number of errors encountered
    """
    feedback.debug("Start on %s ..."%chapter_url)

    global step_delay
    global ch_start
    global ch_end

    chapter     = cengine.Chapter(chapter_url)
    chapter_num = float(chapter.getChapterNumber() )
    
    if chapter_num < ch_start:
        return 0
    elif chapter_num > ch_end:
        return 'max'
    
    # IF no start was specified THEN use the last success as base
    if ch_start == -1 and chapter_num <= dlstate.get("last"):
        return 0

    feedback.info("  Get %s"%chapter_url)

    page_urls   = chapter.getPageUrls()
    if page_urls == None:
        return ['%s not a valid chapter'%chapter_num]

    chapter_dir = os.path.sep.join([comic_dir, chapter.getChapterLowerName()])

    feedback.info("    %i pages"%len(page_urls))

    failed_urls = []
    for url in page_urls:
        try:
            downloadPage(cengine, url, chapter_dir)
        except ComicEngine.ComicError as e:
            feedback.warn("Oops : %s"%str(e) )
            failed_urls.append(url)
        except urllib.error.URLError as e:
            feedback.warn("Could not download %s"%url)
            failed_urls.append(url)
        except web.DownloadError as e:
            feedback.warn("%i : %s"%(e.code,str(e)) )
            failed_urls.append(url)

        time.sleep(step_delay)

    if len(failed_urls) == 0:
        feedback.debug("  Compiling to CBZ ...")
        try:
            cbz.CBZArchive(chapter_dir).compile(remove_dir=True)
            dlstate.set("last", chapter_num) # Inequivocable success !
        except Exception as e:
            feedback.warn( str(e) )
            errors += 1

    return failed_urls
Exemple #6
0
    def __init__(self, experiment_config_name,
                 extra_dict={}, config_roots=[''], getexp=False):
        '''Read experiment config to get basic settings
        
        TODO: probably nicer if default experiment is given as argument
        '''

        # State variables
        self.version_info_missing = False

        #
        # Helper functions
        #

        def split_jobs(config):
            '''Post-process job definition to allow for shared configs as [[job1, job2]]'''
            if 'jobs' in config:
                sep = re.compile(r'\s*,\s*')
                for subjobs, subconfig in config['jobs'].iteritems():
                    if re.search(sep, subjobs):
                        for subjob in re.split(sep, subjobs):
                            if subjob in config['jobs']:
                                config['jobs'][subjob].merge(subconfig.dict())
                            else:
                                config['jobs'][subjob] = subconfig.dict()
                        del config['jobs'][subjobs]

        def get_config_name(lib_name, base_name):
            '''Cycle through config path until a match is found.
               
               Return simple path otherwise'''
            config_name = os.path.join(lib_name, base_name)
            for config_root in config_roots:
                tentative_name = os.path.join(config_root, config_name)
                if os.path.exists(tentative_name):
                    config_name = tentative_name
                    break
            return config_name

        def read_value(value):
            if os.path.exists(value):
                stream = open(value)
                result = stream.read().strip()
                stream.close()
            else:
                result = ''
            return result

        def sec2time(seconds):
            '''Create time string (HH:MM:SS) from second of day'''
            seconds = int(seconds)
            if seconds >= 86400:
                raise ValueError("invalid second of day '{0}'".format(seconds))
            minutes, s = divmod(seconds, 60)
            h, m = divmod(minutes, 60)
            return "{0:02}:{1:02}:{2:02}".format(h, m, s)

        def split_date(value):
            '''Re-format datetime string to list for use in namelists'''
            match = re.match(r'^0*(\d+)-0*(\d+)-0*(\d+)'
                             r'([T ]0*(\d+)(:0*(\d+)(:0*(\d+))?)?)?$', value)
            if match:
                return [match.groups('0')[i] for i in [0,1,2,4,6,8]]

            match = re.match(r'^0*(\d+?)(\d{2})(\d{2})'
                             r'([T ]0*(\d+)(:0*(\d+)(:0*(\d+))?)?)?$', value)
            if match:
                return [match.groups('0')[i] for i in [0,1,2,4,6,8]]
                
            raise ValueError("invalid date/time '{0}'".format(value))

        def add_years(value, years):
            '''Add specified number of years (possible negative) to date'''
            years = int(years)
            dt = map(int, split_date(value))
            dt[0] += years
            return "{0:+05}-{1:02}-{2:02}".format(*dt).lstrip('+')

        def add_days(value, days):
            '''Add specified number of days (possible negative) to date'''
            def leap(year):
                return (not year % 4) and (not (not year % 100) or (not year % 400)) 
            def monlen(year, mon):
                monlens = (0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, 0)
                return monlens[mon] + (mon == 2 and leap(year))
            def add_days_(year, mon, day, days):
                while True:
                    if mon == 0:
                        year -= 1
                        mon = 12
                        day = monlen(year, 12)
                        continue
                    if mon == 13:
                        year += 1
                        mon = 1
                        day = 1
                        continue
                    if day + days <= 0:
                        days += day
                        mon -= 1
                        day = monlen(year, mon)
                        continue
                    if day + days > monlen(year, mon):
                        days -= monlen(year, mon) - day + 1
                        mon += 1
                        day = 1
                        continue
                    day += days
                    break

                return (year, mon, day)

            days = int(days)
            dt = map(int, split_date(value))
            dt = add_days_(dt[0], dt[1], dt[2], days)
            return "{0:+05}-{1:02}-{2:02}".format(*dt).lstrip('+')

        def eval_value(value):
            '''
                Evaluate key as python expression,
                return as string or sequence of strings.
            '''
            result = eval(value)
            if isinstance(result, (list, tuple)):
                result = map(str, result)
            else:
                result = str(result)
            return result

        def eval_value_string(value):
            '''
                Evaluate key as python expression,
                return as string or sequence of strings.
            '''
            result = eval_value(value)
            if isinstance(result, (list, tuple)):
                result = ", ".join(result)
            return result

        def eval_expression(value):
            '''
                Check if value is a supported expression.
                If so, evaluate and return result, otherwise just pass through.
            '''
            match = re.match(r'^eval\((.*)\)$', value, re.S)
            if match:
                return eval_value(match.group(1))

            match = re.match(r'^evals\((.*)\)$', value, re.S)
            if match:
                return eval_value_string(match.group(1))

            match = re.match(r'^add_(years|days)\(\s*([-\d]+([T ][\d:]+)?)\s*,\s*([-+]?\d+)\s*\)$', value, re.S)
            if match:
                if match.group(1) == 'days':
                    return add_days(match.group(2), match.group(4))
                return add_years(match.group(2), match.group(4))

            match = re.match(r'^split_date\((.*)\)$', value, re.S)
            if match:
                return split_date(match.group(1))

            match = re.match(r'^sec2time\((.*)\)$', value, re.S)
            if match:
                return sec2time(match.group(1))

            match = re.match(r'^read\((.*)\)$', value, re.S)
            if match:
                return read_value(match.group(1))

            return value

        # Interpolate and evaluate keys if they are an expression
        def eval_key(section, key):
            try:
                value = section[key]
                if isinstance(value, (list, tuple)):
                    value = map(eval_expression, value)
                elif isinstance(value, basestring):
                    value = eval_expression(value)
                if isinstance(value, (list, tuple)):
                    value = [v.replace('$', '$$') for v in value]
                elif isinstance(value, basestring):
                    value = value.replace('$', '$$')
            except (InterpolationError, ValueError) as error:
                raise ExpConfigError(error.message, key)
            section[key] = value

        # Undo remaining changes from walk with eval_key
        def uneval_key(section, key):
            try:
                value = section[key]
                if isinstance(value, (list, tuple)):
                    value = [v.replace('$$', '$') for v in value]
                elif isinstance(value, basestring):
                    value = value.replace('$$', '$')
            except (InterpolationError, ValueError) as error:
                raise ExpConfigError(error.message, key)
            section[key] = value

        # Move version info from local config to global list
        def register_version(pre_config, config_versions):
            if 'VERSION_' in pre_config:
                config_versions.append(pre_config['VERSION_'])
                del pre_config['VERSION_']
            else:
                self.version_info_missing = True

        #
        # Method body
        #

        # Pre-read basic experiment settings

        pre_config = None
        setup_config_name = get_config_name('', ExpConfig.setup_config_name)
        if os.path.exists(setup_config_name):
            pre_config = ConfigObj(setup_config_name, interpolation=False)
        user_config = ConfigObj(experiment_config_name, interpolation=False)
        if pre_config:
            pre_config.merge(user_config)
        else:
            pre_config = user_config

        experiment_type = extra_dict.get('EXP_TYPE', pre_config['EXP_TYPE'])
        # Empty environment should load default
        environment = extra_dict.get('ENVIRONMENT', 
                      pre_config.get('ENVIRONMENT',
                      ExpConfig.default_name))
        # Options should always be treated as a list
        setup_options = extra_dict.get('SETUP_OPTIONS',
                        pre_config.get('SETUP_OPTIONS',
                        ''))
        if isinstance(setup_options, basestring):
            if setup_options:
                setup_options = [setup_options]
            else:
                setup_options = []
        exp_options = extra_dict.get('EXP_OPTIONS',
                      pre_config.get('EXP_OPTIONS',
                      ''))
        if isinstance(exp_options, basestring):
            if exp_options:
                exp_options = [exp_options]
            else:
                exp_options = []
        options = setup_options + exp_options
        # Backwards compatibility ENVIRONMENT -> QUEUE_TYPE
        if environment == ExpConfig.default_name and 'QUEUE_TYPE' in pre_config:
            feedback.warning("found obsolete keyword 'QUEUE_TYPE'; "
                             "should be replaced by 'ENVIRONMENT'")
            environment = pre_config['QUEUE_TYPE']
        # Load default if environment was deliberately set to empty
        if not environment:
            environment = ExpConfig.default_name

        pre_config = None
        user_config = None

        # Start from empty configuration

        pre_config = ConfigObj(interpolation=False)
        config_versions = []

        # Get default experiment id from file name
        pre_config[ExpConfig.id_name] = os.path.splitext(
            os.path.basename(experiment_config_name)
        )[0]

        # Read Environment

        env_dict = dict(os.environ)
        if not getexp:
            # Mask literal dollar characters
            for key, value in env_dict.iteritems():
                env_dict[key] = value.replace('$', '$$')
        pre_config.merge({'DEFAULT': {}})
        for key, value in sorted(env_dict.iteritems()):
            pre_config['DEFAULT'][key] = value

        # Read experiment settings from library (default and type specific)

        lib_config_name = get_config_name(ExpConfig.exp_lib_dir,
                                          ExpConfig.default_name+'.config')
        pre_config.merge(ConfigObj(lib_config_name, interpolation=False))
        split_jobs(pre_config)
        register_version(pre_config, config_versions)

        if os.path.exists(setup_config_name):
            pre_config.merge(ConfigObj(setup_config_name, interpolation=False))
            split_jobs(pre_config)
            register_version(pre_config, config_versions)

        lib_config_name = get_config_name(ExpConfig.exp_lib_dir, 
                                          experiment_type+'.config')
        if os.path.exists(lib_config_name):
            pre_config.merge(ConfigObj(lib_config_name, interpolation=False))
            split_jobs(pre_config)
            register_version(pre_config, config_versions)
        else:
            feedback.warning("cannot find experiment config for '%s', "+
                             "using default only", experiment_type)

        for option in options:
            lib_config_name = get_config_name(ExpConfig.opt_lib_dir, 
                                              option+'.config')
            if os.path.exists(lib_config_name):
                pre_config.merge(ConfigObj(lib_config_name, interpolation=False))
                split_jobs(pre_config)
                register_version(pre_config, config_versions)
            else:
                feedback.warning("cannot find config for option '%s', using "+
                                 "default/experiment type only", option)

        # Read host environment settings from library

        lib_config_name = get_config_name(ExpConfig.env_lib_dir,
                                          environment+'.config')

        if os.path.exists(lib_config_name):
            pre_config.merge(ConfigObj(lib_config_name, interpolation=False))
            register_version(pre_config, config_versions)

        # Warn user if at least one config had no version info
        if self.version_info_missing:
            feedback.info("version info for standard config is incomplete")

        # Re-read config to allow overriding default settings
        # TODO: probably nicer if default experiment is given as argument
        experiment_config = ConfigObj(experiment_config_name,
                                      interpolation=False)
        pre_config.merge(experiment_config)
        split_jobs(pre_config)

        # Add extra dictionary
        pre_config.merge(extra_dict)

        # Backwards compatibility ENVIRONMENT -> QUEUE_TYPE
        pre_config['ENVIRONMENT'] = environment

        # Add complete versioning info
        if not getexp:
            pre_config['VERSIONS_'] = config_versions

        # Re-read merged config with interpolation set.
        # This works around incomprehensible inheritance of interpolation with
        # merge. Make sure that all values are interpolated

        config_lines = StringIO.StringIO()

        pre_config.write(config_lines)
        pre_config = None

        config_lines.seek(0)
        pre_config = ConfigObj(config_lines,
                               interpolation=False if getexp else 'template')

        # Extract experiment description from initial comment
        # if not set explicitly
        if not pre_config.has_key('EXP_DESCRIPTION'):
            is_empty = lambda s: re.match(r'^[\s#]*$', s)
            rm_comment = lambda s: re.sub(r'^\s*# ?', '', s)       
            pre_config['EXP_DESCRIPTION'] = "\n".join(
                reversed(list(
                    dropwhile(is_empty,
                        reversed(list(
                            dropwhile(is_empty,
                                map(rm_comment,
                                    experiment_config.initial_comment)
                            )
                        )) 
                    )
                ))
            )

        pre_config.walk(eval_key)

        # Re-read final config without interpolation.
        # This allows copying data without evaluation of version keywords.

        config_lines.seek(0)
        config_lines.truncate()

        pre_config.write(config_lines)
        pre_config = None

        config_lines.seek(0)
        ConfigObj.__init__(self, config_lines, interpolation=False)
        self.walk(uneval_key)
        
        self.experiment_id = self[ExpConfig.id_name]
        self.experiment_kind = re.sub(r'-\w+$', '', experiment_type)