예제 #1
0
 def get_wiki_config(self, wikiname):
     '''
     parse and return the configuration for a particular wiki
     '''
     wikiconf = Config(self.configfile)
     wikiconf.parse_conffile_per_project(wikiname)
     return wikiconf
예제 #2
0
    def __init__(self, actions, show, message, job_status, undo, configfile,
                 wikiname, dryrun, verbose):
        '''
        constructor.
        reads configs for every wiki, this might be wasteful
        but really how long can it take? even with 1k wikis
        '''
        self.verbose = verbose
        if not actions and not undo:
            if self.verbose:
                sys.stderr.write("No actions specified.\n")
            return

        self.actions = actions
        self.undo = undo
        self.dryrun = dryrun
        self.wikiname = wikiname
        self.configfile = configfile
        self.message = message
        self.show = show
        self.job_status = job_status
        self.conf = Config(self.configfile)

        if self.wikiname is None:
            self.wikilist = self.conf.db_list
        else:
            self.wikilist = [self.wikiname]

        self.wikiconfs = {}
        for wiki in self.wikilist:
            self.wikiconfs[wiki] = self.get_wiki_config(wiki)
예제 #3
0
    def parse_conffile(self):
        '''
        grab values from configuration and assign them to appropriate variables
        '''
        self.wiki_dir = self.conf.get("wiki", "mediawiki")
        self.all_wikis_list = MiscUtils.db_list(self.conf.get("wiki", "allwikislist"))
        self.private_wikis_list = MiscUtils.db_list(self.conf.get("wiki", "privatewikislist"))
        self.closed_wikis_list = MiscUtils.db_list(self.conf.get("wiki", "closedwikislist"))
        self.skip_wikis_list = MiscUtils.db_list(self.conf.get("wiki", "skipwikislist"))

        if not self.conf.has_section('output'):
            self.conf.add_section('output')
        self.dump_dir = self.conf.get("output", "dumpdir")
        self.temp_dir = self.conf.get("output", "temp")
        self.indextmpl = self.conf.get("output", "indextmpl")
        self.template_dir = self.conf.get("output", "templatedir")
        self.webroot = self.conf.get("output", "webroot")
        fileperms = self.conf.get("output", "fileperms")
        self.fileperms = int(fileperms, 0)
        lock_stale = self.conf.get("output", "lockstale")
        self.lock_stale = int(lock_stale, 0)
        if not self.conf.has_section('tools'):
            self.conf.add_section('tools')
        self.php = self.conf.get("tools", "php")
        self.gzip = self.conf.get("tools", "gzip")
        self.bzip2 = self.conf.get("tools", "bzip2")
        self.mysql = self.conf.get("tools", "mysql")
        self.checkforbz2footer = self.conf.get("tools", "checkforbz2footer")
        self.multiversion = self.conf.get("tools", "multiversion")
        self.adminsettings = self.conf.get("tools", "adminsettings")

        if not self.conf.has_section('cleanup'):
            self.conf.add_section('cleanup')
        self.keep = self.conf.getint("cleanup", "keep")

        self.db_user = None
        self.db_password = None
        if not self.conf.has_section('database'):
            self.conf.add_section('database')
        if self.conf.has_option('database', 'user'):
            self.db_user = self.conf.get("database", "user")
        if self.conf.has_option('database', 'password'):
            self.db_password = self.conf.get("database", "password")
        # get from MW adminsettings file if not set in conf file
        if not self.db_user:
            self.db_user, self.db_password = Config.get_db_user_and_password(
                self.conf, self.wiki_dir)
        self.max_allowed_packet = self.conf.get("database", "max_allowed_packet")
예제 #4
0
def main():
    'main entry point, does all the work'
    wiki = None
    output_file = None
    start = None
    end = None
    configfile = "wikidump.conf"
    dryrun = False

    try:
        (options, remainder) = getopt.gnu_getopt(
            sys.argv[1:], "w:o:s:e:C:fhv",
            ["wiki=", "outfile=",
             "start=", "end=", "config=",
             "help", "dryrun"])

    except getopt.GetoptError as err:
        usage("Unknown option specified: " + str(err))
    for (opt, val) in options:
        if opt in ["-w", "--wiki"]:
            wiki = val
        elif opt in ["-o", "--outfile"]:
            output_file = val
        elif opt in ["-s", "--start"]:
            start = val
        elif opt in ["-e", "--end"]:
            end = val
        elif opt in ["-C", "--config"]:
            configfile = val
        elif opt in ["-d", "--dryrun"]:
            dryrun = True
        elif opt in ["-h", "--help"]:
            usage('Help for this script\n')
        else:
            usage("Unknown option specified: <%s>" % opt)

    if len(remainder) > 0:
        usage("Unknown option(s) specified: <%s>" % remainder[0])

    if wiki is None:
        usage("mandatory argument argument missing: --wiki")
    if output_file is None:
        usage("mandatory argument argument missing: --output")

    if start is not None:
        if not start.isdigit():
            usage("value for --start must be a number")
        else:
            start = int(start)

    if end is not None:
        if not end.isdigit():
            usage("value for --end must be a number")
        else:
            end = int(end) - 1

    if not os.path.exists(configfile):
        usage("no such file found: " + configfile)

    wikiconf = Config(configfile)
    wikiconf.parse_conffile_per_project(wiki)
    dologsbackup(wiki, output_file, wikiconf, start, end, dryrun)
예제 #5
0
class ActionHandler(object):
    '''
    methods for all actions, whether on one wiki or on all
    '''

    def __init__(self, actions, show, message, job_status, undo, configfile,
                 wikiname, dryrun, verbose):
        '''
        constructor.
        reads configs for every wiki, this might be wasteful
        but really how long can it take? even with 1k wikis
        '''
        self.verbose = verbose
        if not actions and not undo:
            if self.verbose:
                sys.stderr.write("No actions specified.\n")
            return

        self.actions = actions
        self.undo = undo
        self.dryrun = dryrun
        self.wikiname = wikiname
        self.configfile = configfile
        self.message = message
        self.show = show
        self.job_status = job_status
        self.conf = Config(self.configfile)

        if self.wikiname is None:
            self.wikilist = self.conf.db_list
        else:
            self.wikilist = [self.wikiname]

        self.wikiconfs = {}
        for wiki in self.wikilist:
            self.wikiconfs[wiki] = self.get_wiki_config(wiki)

    def get_wiki_config(self, wikiname):
        '''
        parse and return the configuration for a particular wiki
        '''
        wikiconf = Config(self.configfile)
        wikiconf.parse_conffile_per_project(wikiname)
        return wikiconf

    def do_all(self):
        '''
        do all actions specified at instantiation time
        '''
        self.conf.parse_conffile_globally()
        self.do_global_actions()
        self.undo_global_actions()
        self.do_per_wiki_actions()
        self.undo_per_wiki_actions()

    def do_global_actions(self):
        '''
        do all actions that either do not
        reference a particular wiki (maintenance,
        exit) or may run on one or all wikis
        '''
        for item in self.actions:
            if item == "kill":
                self.do_kill()
            elif item == "unlock":
                self.do_unlock()
            elif item == "remove":
                self.do_remove()
            elif item == "rerun":
                self.do_rerun()
            elif item == "maintenance":
                self.do_maintenance()
            elif item == "exit":
                self.do_exit()
            elif item == "show":
                self.do_show()

    def do_per_wiki_actions(self):
        '''
        do all actions that must reference
        only one wiki
        '''
        for item in self.actions:
            if item == "notice":
                for wiki in self.wikiconfs:
                    self.do_notice(wiki)
            elif item == "mark":
                self.do_mark(self.wikiname)

    def undo_global_actions(self):
        '''
        undo all specified actions that do not
        reference a particular wiki
        '''
        for item in self.undo:
            if item == "maintenance":
                self.undo_maintenance()
            elif item == "exit":
                self.undo_exit()

    def undo_per_wiki_actions(self):
        '''
        undo all specified actions that must
        reference a particular wiki
        '''
        for wiki in self.wikiconfs:
            for item in self.undo:
                if item == "notice":
                    self.undo_notice(wiki)

    def get_dump_pids(self):
        '''
        get list of pids either for one wiki or for all
        which are running dumps; these must have been started by
        either the scheduler, the bash wrapper or the worker.py
        script.  i.e. if a user runs dumpBackups.php by hand
        that is not going to be picked up.

        don't rely on lock files, they may have been removed or not created
        look up processes with DUMPS environ var set. values:
           'scheduler' (the dumps scheduler)
           'wrapper' (the bash dumps wrapper that runs across all wikis
           pid (the worker that runs on one wiki and any processes it spawned)
        we want at all costs to avoid hardcoded list of commands
        '''
        pids = []
        uid = os.geteuid()
        for process_id in os.listdir('/proc'):
            if process_id.isdigit():
                # owned by us
                puid = os.stat(os.path.join('/proc', process_id)).st_uid
                if puid == uid:
                    # has DUMPS environ var
                    try:
                        process_environ = open("/proc/%s/environ" % process_id, "r")
                    except IOError as ex:
                        # permission or gone, anyways not us
                        continue
                    for line in process_environ:
                        if line:
                            fields = line.split("\x00")
                            for field in fields:
                                if field.startswith("DUMPS="):
                                    # if no wiki specified for instance, get procs for all
                                    if self.wikiname is None or command_has_wiki(
                                            process_id, self.wikiname):
                                        pids.append(process_id)
                                    break
                    process_environ.close()
        return pids

    def do_kill(self):
        '''
        kill all dump related processes for the wiki specified
        at instantiation or all wikis; good only for processes
        started by the scheduler, the bash wrapper script or
        the python worker script
        '''
        pids = self.get_dump_pids()
        if self.dryrun:
            print "would kill processes", pids
            return
        elif self.verbose:
            print "killing these processes:", pids

        for pid in pids:
            os.kill(int(pid), signal.SIGTERM)

    def do_unlock(self):
        '''
        unlock either wiki specified at instantiation or
        all wikis, provided they were locked on current host
        '''
        lock_info = self.find_dump_lockinfo()
        for wiki in lock_info:
            for lockfile_content in lock_info[wiki]:
                if check_process_running(lockfile_content['pid']):
                    continue
                if self.dryrun:
                    print("would remove lock", lockfile_content['filename'],
                          "for wiki", wiki)
                else:
                    if self.verbose:
                        print "removing lock for", wiki
                    os.unlink(lockfile_content['filename'])

    def find_failed_dumps_for_wiki(self, wikiname):
        '''
        return list of failed jobs for the latest run
        for the specified wiki or empty list if there are none
        '''

        failed_jobs = []
        # fixme how is the above a string, shouldn't it be a function?
        wiki = Wiki(self.wikiconfs[wikiname], wikiname)
        date = wiki.latest_dump()
        if date is None:
            return [], None

        wiki.set_date(date)
        run_info_file = RunInfoFile(wiki, False)
        results = run_info_file.get_old_runinfo_from_file()
        if not results:
            return [], None

        for entry in results:
            if entry["status"] == "failed":
                failed_jobs.append(entry["name"])
        return failed_jobs, date

    def find_failed_dumps(self):
        '''
        return dict of failed jobs per wiki during most recent run,
        skipping over wikis with no failed jobs
        '''

        failed_dumps = {}
        for wiki in self.wikilist:
            results, date = self.find_failed_dumps_for_wiki(wiki)
            if results and date is not None:
                failed_dumps[wiki] = {}
                failed_dumps[wiki][date] = results

        if self.verbose:
            print "failed dumps info:", failed_dumps
        return failed_dumps

    def do_rerun(self):
        self.do_remove(rerun=True)

    def do_remove(self, rerun=False):
        '''
        find all failed dump jobs for unlocked wikis
        clean them up after getting lock on each one
        first, then remove lock

        if a specific wiki was specified at instantiation,
        clean up only that wiki
        '''
        failed_dumps = self.find_failed_dumps()
        for wikiname in failed_dumps:
            for date in failed_dumps[wikiname]:
                wiki = Wiki(self.wikiconfs[wikiname], wikiname)
                wiki.set_date(date)
                locker = Locker(wiki, date)
                try:
                    locker.lock()
                except Exception as ex:
                    sys.stderr.write("Couldn't lock %s, can't do cleanup\n" % wikiname)
                    continue
                self.cleanup_dump(wiki, failed_dumps[wikiname][date], rerun=rerun)
                locker.unlock(locker.get_lock_file_path())

    def cleanup_dump(self, wiki, failed_jobs, rerun=False):
        '''
        for the specified wiki, and the given list
        of failed jobs, find all the output files, toss
        them, then rebuild: md5sums file, symlinks
        into latest dir, dump run info file
        '''
        # need to update status files, dumpruninfo, checksums file
        # and latest links.
        runner = Runner(wiki, prefetch=True, spawn=True, job=None,
                        skip_jobs=[], restart=False, notice="", dryrun=False,
                        enabled=None, partnum_todo=False, checkpoint_file=None,
                        page_id_range=None, skipdone=[], cleanup=False, verbose=self.verbose)

        if not failed_jobs:
            if self.verbose:
                print "no failed jobs for wiki", wiki
            return

        if not self.dryrun:
            runner.dumpjobdata.do_before_dump()

        # need to redo the md5sums again of the files we don't toss...
        # so they are copied into the temp file. eeewww
        for job in failed_jobs:
            files = get_job_output_files(wiki, job, runner.dump_item_list.dump_items)
            paths = [runner.dump_dir.filename_public_path(fileinfo) for fileinfo in files]
            if self.verbose:
                print "for job", job, "these are the output files:", paths
            for filename in paths:
                if self.dryrun:
                    print "would unlink", filename
                else:
                    try:
                        os.unlink(filename)
                    except Exception as ex:
                        continue

        if not self.dryrun:
            for item in runner.dump_item_list.dump_items:
                if item.status() == "done":
                    runner.dumpjobdata.do_after_job(item)

        if self.dryrun:
            print "would update dumpruninfo file, checksums file, ",
            print "status file, index.html file and symlinks to latest dir"
            return

        runner.dumpjobdata.do_after_dump(runner.dump_item_list.dump_items)

        if self.verbose:
            print "updating status files for wiki", wiki.db_name

        if runner.dump_item_list.all_possible_jobs_done():
            # All jobs are either in status "done", "waiting", "failed", "skipped"
            runner.indexhtml.update_index_html("done")
            runner.statushtml.update_status_file("done")
        else:
            runner.indexhtml.update_index_html("partialdone")
            runner.statushtml.update_status_file("partialdone")

        if rerun:
            for job in failed_jobs:
                runner.dump_item_list.mark_dumps_to_run(job)
            self.rerun_jobs(runner)

    def log_and_print(self, message):
        sys.stderr.write("%s\n" % message)

    def debug(self, stuff):
        self.log_and_print("%s: %s" % (TimeUtils.pretty_time(), stuff))

    def rerun_jobs(self, runner):
        runner.dumpjobdata.do_before_dump()

        for item in runner.dump_item_list.dump_items:
            if item.to_run():
                item.start()
                runner.indexhtml.update_index_html()
                runner.statushtml.update_status_file()
                runner.dumpjobdata.do_before_job(runner.dump_item_list.dump_items)
                try:
                    item.dump(runner)
                except Exception as ex:
                    exc_type, exc_value, exc_traceback = sys.exc_info()
                    if self.verbose:
                        sys.stderr.write(repr(traceback.format_exception(
                            exc_type, exc_value, exc_traceback)))
                    else:
                        if exc_type.__name__ == 'BackupPrereqError':
                            self.debug(str(ex))
                        else:
                            self.debug("*** exception! " + str(ex))
                            if exc_type.__name__ != 'BackupPrereqError':
                                item.set_status("failed")

                # Here for example status is "failed". But maybe also
                # "in-progress", if an item chooses to override dump(...) and
                # forgets to set the status. This is a failure as well.
                if item.status() not in ["done", "waiting", "skipped"]:
                    runner.failurehandler.report_failure()
                    runner.failurehandler.failure_count += 1

            if item.status() == "done":
                runner.dumpjobdata.do_after_job(item)
            elif item.status() == "waiting" or item.status() == "skipped":
                continue
            else:
                # failure
                continue

        if runner.dump_item_list.all_possible_jobs_done():
            # All jobs are either in status "done", "waiting", "failed", "skipped"
            runner.indexhtml.update_index_html_file("done")
            runner.statushtml.update_status_file("done")
        else:
            # This may happen if we start a dump now and abort before all items are
            # done. Then some are left for example in state "waiting". When
            # afterwards running a specific job, all (but one) of the jobs
            # previously in "waiting" are still in status "waiting"
            runner.indexhtml.update_index_html("partialdone")
            runner.statushtml.update_status_file("partialdone")

        runner.dumpjobdata.do_after_dump(runner.dump_item_list.dump_items)

    def do_maintenance(self):
        '''
        create an empty maintenance.txt file
        causes the dump runners after the next job
        to run no jobs per wiki
        and sleep 5 minutes in between each wiki

        this is a global action that affects all wikis
        run on the given host
        '''
        if self.dryrun:
            print "would create maintenance file"
            return
        elif self.verbose:
            print "creating maintenance file"
        create_file("maintenance.txt")

    def do_exit(self):
        '''
        create an empty exit.txt file; causes the
        dump runners to exit after next job

        this is a global action that affects all wikis
        run on the given host
        '''
        if self.dryrun:
            print "would create exit file"
            return
        elif self.verbose:
            print "creating exit file"
        create_file("exit.txt")

    def do_show(self):
        '''
        show specified information for all wikis
        '''
        if self.show == 'lastrun':
            dbinfo = self.conf.db_latest_status()
            dbdates = [date for (_dbname, _status, date) in dbinfo if date is not None]
            dbdates = sorted(dbdates)
            if not len(dbdates):
                print ""
            else:
                print dbdates[-1]
        elif self.show == "alldone":
            dbinfo = self.conf.db_latest_status()
            # skip cases where there is no status file. maybe we will revisit this later
            statuses = [status for (_dbname, status, _date) in dbinfo if status is not None]
            for status in statuses:
                if status != "complete":
                    print ""
                    break
            else:
                print "True"
        elif (self.show in ["failed", "aborted", "missing", "progress",
                            "partial", "complete", "not yet"]):
            dbinfo = self.conf.db_latest_status()
            # skip cases where there is no status file. maybe we will revisit this later
            dbs_to_show = [dbname for (dbname, status, date) in dbinfo if status == self.show]
            if dbs_to_show:
                print dbs_to_show
        else:
            print "No such known element for 'show'"

    def do_notice(self, wikiname):
        '''
        create a notice.txt file for the particular wiki for
        the most recent run. the contents will appear on its
        web page for that dump run
        '''
        wiki = Wiki(self.wikiconfs[wikiname], wikiname)
        date = wiki.latest_dump()
        if date is None:
            print "dump never run, not adding notice file for wiki", wikiname
            return

        if self.dryrun:
            print "would add notice.txt for wiki", wikiname, "date", date
            return
        elif self.verbose:
            print "creating notice file for wiki", wikiname, "date", date

        wiki.set_date(date)
        NoticeFile(wiki, self.message, True)

    def do_mark(self, wikiname):
        '''
        mark the specified job with the specified status.
        '''

        wiki = Wiki(self.wikiconfs[wikiname], wikiname)
        date = wiki.latest_dump()
        if date is None:
            print "dump never run, not marking job for wiki", wikiname
            return
        wiki.set_date(date)

        runner = Runner(wiki, prefetch=True, spawn=True, job=None,
                        skip_jobs=[], restart=False, notice="", dryrun=False,
                        enabled=None, partnum_todo=False, checkpoint_file=None,
                        page_id_range=None, skipdone=[], cleanup=False, verbose=self.verbose)

        known_jobs = [item.name() for item in runner.dump_item_list.dump_items] + ['tables']
        if ':' in self.job_status:
            job, status = self.job_status.split(":", 1)
            if status not in ["done", "failed"]:
                status = None
            if job not in known_jobs:
                job = None
        if job is None or status is None:
            print "bad or no job/status specified", self.job_status
            if self.verbose:
                print "known jobs", known_jobs
            return

        runner.dumpjobdata.do_before_dump()

        for item in runner.dump_item_list.dump_items:
            if item.name() == job:
                item.set_status(status, True)
            if item.status() == "done":
                runner.dumpjobdata.do_after_job(item)
            elif item.status() not in ["done", "waiting", "skipped"]:
                runner.failurehandler.failure_count += 1

        if self.verbose:
            print "updating status files for wiki", wiki.db_name
        if runner.dump_item_list.all_possible_jobs_done():
            # All jobs are either in status "done", "waiting", "failed", "skipped"
            runner.indexhtml.update_index_html("done")
            runner.statushtml.update_status_file("done")
        else:
            runner.indexhtml.update_index_html("partialdone")
            runner.statushtml.update_status_file("partialdone")

        runner.dumpjobdata.do_after_dump(runner.dump_item_list.dump_items)
        return

    def undo_maintenance(self):
        '''
        remove any maintenance.txt file that may exist,
        resumes normal operations
        '''
        if self.dryrun:
            print "would remove maintenance file"
            return
        elif self.verbose:
            print "removing maintenance file"
        remove_file("maintenance.txt")

    def undo_exit(self):
        '''
        remove any exit.txt file that may exist,
        resumes normal operations
        '''
        if self.dryrun:
            print "would remove exit file"
            return
        elif self.verbose:
            print "removing exit file"
        remove_file("exit.txt")

    def undo_notice(self, wikiname):
        '''
        remove any notice.txt file that may exist
        for the most current run for the given wiki
        '''
        wiki = Wiki(self.wikiconfs[wikiname], wikiname)
        date = wiki.latest_dump()
        if date is None:
            print "dump never run, no notice file to remove for wiki", wikiname
            return

        if self.dryrun:
            print "would remove notice.txt for wiki", wikiname, "date", date
            return
        elif self.verbose:
            print "removing notice file for wiki", wikiname, "date", date

        wiki.set_date(date)
        NoticeFile(wiki, False, True)

    def find_dump_lockinfo(self):
        '''
        get and return host, pid, lockfile name for the wiki
        specified at instantiation or for all wikis, for
        lockfiles created on current host
        '''
        my_hostname = socket.getfqdn()

        lockfiles = []
        results = {}
        if self.wikiname is not None:
            lockfiles = glob.glob(os.path.join(self.wikiconfs[self.wikiname].private_dir,
                                               self.wikiname, "lock_*"))
        else:
            lockfiles = glob.glob(os.path.join(self.conf.private_dir, "*", "lock_*"))

        for filename in lockfiles:
            host, pid = get_lockfile_content(filename)
            wiki = self.get_wiki_from_lockfilename(filename)
            if host == my_hostname:
                if wiki not in results:
                    results[wiki] = []
                results[wiki].append({'pid': pid, 'host': host, 'filename': filename})
        return results

    def get_wiki_from_lockfilename(self, filename):
        '''
        given the full lockfile name, grab the wiki name out of it
        and return it
        '''
        if "lock" in filename and filename.startswith(self.conf.private_dir):
            filename = filename[len(self.conf.private_dir):]
            filename = filename.lstrip(os.path.sep)
            # wikiname/lock_...
            wikiname = filename.split(os.path.sep)[0]
            return wikiname
        else:
            return None
예제 #6
0
def main():
    os.environ['DUMPS'] = str(os.getpid())

    try:
        date = None
        config_file = False
        force_lock = False
        prefetch = True
        prefetchdate = None
        spawn = True
        restart = False
        jobs_requested = None
        skip_jobs = None
        enable_logging = False
        html_notice = ""
        dryrun = False
        partnum_todo = None
        after_checkpoint = False
        checkpoint_file = None
        page_id_range = None
        cutoff = None
        exitcode = 1
        skipdone = False
        do_locking = False
        verbose = False
        cleanup_files = False
        do_prereqs = False

        try:
            (options, remainder) = getopt.gnu_getopt(
                sys.argv[1:], "",
                ['date=', 'job=', 'skipjobs=', 'configfile=', 'addnotice=',
                 'delnotice', 'force', 'dryrun', 'noprefetch', 'prefetchdate=',
                 'nospawn', 'restartfrom', 'aftercheckpoint=', 'log', 'partnum=',
                 'checkpoint=', 'pageidrange=', 'cutoff=', "skipdone",
                 "exclusive", "prereqs", "cleanup", 'verbose'])
        except Exception as ex:
            usage("Unknown option specified")

        for (opt, val) in options:
            if opt == "--date":
                date = val
            elif opt == "--configfile":
                config_file = val
            elif opt == '--checkpoint':
                checkpoint_file = val
            elif opt == '--partnum':
                partnum_todo = int(val)
            elif opt == "--force":
                force_lock = True
            elif opt == '--aftercheckpoint':
                after_checkpoint = True
                checkpoint_file = val
            elif opt == "--noprefetch":
                prefetch = False
            elif opt == "--prefetchdate":
                prefetchdate = val
            elif opt == "--nospawn":
                spawn = False
            elif opt == "--dryrun":
                dryrun = True
            elif opt == "--job":
                jobs_requested = val
            elif opt == "--skipjobs":
                skip_jobs = val
            elif opt == "--restartfrom":
                restart = True
            elif opt == "--log":
                enable_logging = True
            elif opt == "--addnotice":
                html_notice = val
            elif opt == "--delnotice":
                html_notice = False
            elif opt == "--pageidrange":
                page_id_range = val
            elif opt == "--cutoff":
                cutoff = val
                if not cutoff.isdigit() or not len(cutoff) == 8:
                    usage("--cutoff value must be in yyyymmdd format")
            elif opt == "--skipdone":
                skipdone = True
            elif opt == "--cleanup":
                cleanup_files = True
            elif opt == "--exclusive":
                do_locking = True
            elif opt == "--verbose":
                verbose = True
            elif opt == "--prereqs":
                do_prereqs = True

        if jobs_requested is not None:
            if ',' in jobs_requested:
                jobs_todo = jobs_requested.split(',')
            else:
                jobs_todo = [jobs_requested]
        else:
            jobs_todo = []

        if dryrun and (len(remainder) == 0):
            usage("--dryrun requires the name of a wikidb to be specified")
        if restart and not jobs_requested:
            usage("--restartfrom requires --job and the job from which to restart")
        if restart and len(jobs_todo) > 1:
            usage("--restartfrom requires --job and exactly one job from which to restart")
        if partnum_todo is not None and not jobs_requested:
            usage("--partnum option requires specific job(s) for which to rerun that part")
        if partnum_todo is not None and restart:
            usage("--partnum option can be specified only for a specific list of jobs")
        if checkpoint_file is not None and (len(remainder) == 0):
            usage("--checkpoint option requires the name of a wikidb to be specified")
        if checkpoint_file is not None and not jobs_requested:
            usage("--checkpoint option requires --job")
        if page_id_range and not jobs_requested:
            usage("--pageidrange option requires --job")
        if page_id_range and checkpoint_file is not None:
            usage("--pageidrange option cannot be used with --checkpoint option")
        if prefetchdate is not None and not prefetch:
            usage("prefetchdate and noprefetch options may not be specified together")
        if prefetchdate is not None and (not prefetchdate.isdigit() or len(prefetchdate) != 8):
            usage("prefetchdate must be of the form YYYYMMDD")
        if skip_jobs is None:
            skip_jobs = []
        else:
            skip_jobs = skip_jobs.split(",")

        # allow alternate config file
        if config_file:
            config = Config(config_file)
        else:
            config = Config()
        externals = ['php', 'mysql', 'mysqldump', 'head', 'tail',
                     'checkforbz2footer', 'grep', 'gzip', 'bzip2',
                     'writeuptopageid', 'recompressxml', 'sevenzip', 'cat']

        failed = False
        unknowns = []
        notfound = []
        for external in externals:
            try:
                ext = getattr(config, external)
            except AttributeError as ex:
                unknowns.append(external)
                failed = True
            else:
                if not exists(ext):
                    notfound.append(ext)
                    failed = True
        if failed:
            if unknowns:
                sys.stderr.write("Unknown config param(s): %s\n" % ", ".join(unknowns))
            if notfound:
                sys.stderr.write("Command(s) not found: %s\n" % ", ".join(notfound))
            sys.stderr.write("Exiting.\n")
            sys.exit(1)

        if (dryrun or partnum_todo is not None or
                (jobs_requested is not None and
                 not restart and
                 not do_locking and
                 not force_lock)):
            locks_enabled = False
        else:
            locks_enabled = True

        if dryrun:
            print "***"
            print "Dry run only, no files will be updated."
            print "***"

        if len(remainder) > 0:
            wiki = Wiki(config, remainder[0])
            if cutoff:
                # fixme if we asked for a specific job then check that job only
                # not the dir
                last_ran = wiki.latest_dump()
                if last_ran >= cutoff:
                    wiki = None
            if wiki is not None and locks_enabled:
                locker = Locker(wiki, date)
                if force_lock and locks_enabled:
                    lockfiles = locker.is_locked()
                    locker.unlock(lockfiles, owner=False)
                if locks_enabled:
                    locker.lock()

        else:
            # if the run is across all wikis and we are just doing one job,
            # we want the age of the wikis by the latest status update
            # and not the date the run started

            if jobs_requested is not None and jobs_requested[0] == 'createdirs':
                check_status_time = False
                # there won't actually be a status for this job but we want
                # to ensure that the directory and the status file are present
                # and intact
                check_job_status = True
                check_prereq_status = False
            else:
                check_status_time = bool(jobs_requested is not None)
                check_job_status = bool(skipdone)
                check_prereq_status = bool(jobs_requested is not None and skipdone)
            wiki = find_lock_next_wiki(config, locks_enabled, cutoff, prefetch,
                                       prefetchdate, spawn,
                                       dryrun, html_notice, check_status_time,
                                       check_job_status, check_prereq_status, date,
                                       jobs_todo[0] if len(jobs_todo) else None,
                                       skip_jobs, page_id_range,
                                       partnum_todo, checkpoint_file, skipdone, restart, verbose)

        if wiki is not None and wiki:
            # process any per-project configuration options
            config.parse_conffile_per_project(wiki.db_name)

            if date == 'last':
                dumps = sorted(wiki.dump_dirs())
                if dumps:
                    date = dumps[-1]
                else:
                    date = None

            if date is None or not date:
                date = TimeUtils.today()
            wiki.set_date(date)

            if after_checkpoint:
                fname = DumpFilename(wiki)
                fname.new_from_filename(checkpoint_file)
                if not fname.is_checkpoint_file:
                    usage("--aftercheckpoint option requires the "
                          "name of a checkpoint file, bad filename provided")
                page_id_range = str(int(fname.last_page_id) + 1)
                partnum_todo = fname.partnum_int
                # now we don't need this.
                checkpoint_file = None
                after_checkpoint_jobs = ['articlesdump', 'metacurrentdump',
                                         'metahistorybz2dump']
                if (jobs_requested is None or
                        not set(jobs_requested).issubset(set(after_checkpoint_jobs))):
                    usage("--aftercheckpoint option requires --job option with one or more of %s"
                          % ", ".join(after_checkpoint_jobs))

            enabled = {}
            if enable_logging:
                enabled = {"logging": True}

            if restart:
                sys.stderr.write("Running %s, restarting from job %s...\n" %
                                 (wiki.db_name, jobs_todo[0]))
            elif jobs_requested:
                sys.stderr.write("Running %s, jobs %s...\n" % (wiki.db_name, jobs_requested))
            else:
                sys.stderr.write("Running %s...\n" % wiki.db_name)

            # no specific jobs requested, runner will do them all
            if not len(jobs_todo):
                runner = Runner(wiki, prefetch, prefetchdate, spawn, None, skip_jobs,
                                restart, html_notice, dryrun, enabled,
                                partnum_todo, checkpoint_file, page_id_range, skipdone,
                                cleanup_files, do_prereqs, verbose)

                result = runner.run()
                if result is not None and result:
                    exitcode = 0

            else:
                # do each job requested one at a time
                for job in jobs_todo:
                    runner = Runner(wiki, prefetch, prefetchdate, spawn, job, skip_jobs,
                                    restart, html_notice, dryrun, enabled,
                                    partnum_todo, checkpoint_file, page_id_range, skipdone,
                                    cleanup_files, do_prereqs, verbose)

                    result = runner.run()
                    if result is not None and result:
                        exitcode = 0

            # if we are doing one piece only of the dump, we don't unlock either
            if locks_enabled:
                locker = Locker(wiki, date)
                lockfiles = locker.is_locked()
                locker.unlock(lockfiles, owner=True)
        elif wiki is not None:
            sys.stderr.write("Wikis available to run but prereqs not complete.\n")
            exitcode = 0
        else:
            sys.stderr.write("No wikis available to run.\n")
            exitcode = 255
    finally:
        cleanup()
    sys.exit(exitcode)