Esempio n. 1
0
    def do_run_item(self, item):
        """
        run the specified dump job (item) if it is marked to be run
        """
        prereq_job = None

        Maintenance.exit_if_in_maintenance_mode(
            "In maintenance mode, exiting dump of %s at step %s"
            % (self.db_name, item.name()))
        if item.to_run():
            item.start()
            self.report.update_index_html_and_json()
            self.statushtml.update_status_file()
            self.runstatus_updater.write_statusapi_file()
            self.specialfiles_updater.write_specialfilesinfo_file()

            self.dumpjobdata.do_before_job(self.dump_item_list.dump_items)

            try:
                item.dump(self)
            except Exception as ex:
                exc_type, exc_value, exc_traceback = sys.exc_info()
                if self.verbose:
                    sys.stderr.write(repr(traceback.format_exception(
                        exc_type, exc_value, exc_traceback)))
                if (exc_type.__name__ == 'BackupPrereqError' or
                        exc_type.__name__ == 'BackupError'):
                    error_message = str(ex)
                    if error_message.startswith("Required job "):
                        prereq_job = error_message.split(" ")[2]
                        self.debug(error_message)
                if prereq_job is None:
                    # exception that doesn't have to do with missing prereqs.
                    self.debug("*** exception! " + str(ex))
                    self.debug(repr(traceback.format_exception(
                        exc_type, exc_value, exc_traceback)))
                    item.set_status("failed")

        if item.status() == "done":
            self.dumpjobdata.do_after_job(item, self.dump_item_list.dump_items)
        elif item.status() == "waiting" or item.status() == "skipped":
            # don't update the checksum files for this item.
            pass
        else:
            # Here for example status is "failed". But maybe also
            # "in-progress", if an item chooses to override dump(...) and
            # forgets to set the status. This is a failure as well.
            self.run_handle_failure()
        return prereq_job
Esempio n. 2
0
    def run(self):
        if self.job_requested:
            if not self.dump_item_list.old_runinfo_retrieved and self.wiki.exists_perdump_index():

                # There was a previous run of all or part of this date, but...
                # There was no old RunInfo to be had (or an error was encountered getting it)
                # so we can't rerun a step and keep all the status information
                # about the old run around.
                # In this case ask the user if they reeeaaally want to go ahead
                print "No information about the previous run for this date could be retrieved."
                print "This means that the status information about the old run will be lost, and"
                print "only the information about the current (and future) runs will be kept."
                reply = raw_input("Continue anyways? [y/N]: ")
                if reply not in ["y", "Y"]:
                    raise RuntimeError("No run information available for previous dump, exiting")

            if not self.dump_item_list.mark_dumps_to_run(self.job_requested, self.skipdone):
                # probably no such job
                sys.stderr.write("No job marked to run, exiting")
                return None
            if self.restart:
                # mark all the following jobs to run as well
                self.dump_item_list.mark_following_jobs_to_run(self.skipdone)
        else:
            self.dump_item_list.mark_all_jobs_to_run(self.skipdone)

        Maintenance.exit_if_in_maintenance_mode(
            "In maintenance mode, exiting dump of %s" % self.db_name)

        self.make_dir(os.path.join(self.wiki.public_dir(), self.wiki.date))
        self.make_dir(os.path.join(self.wiki.private_dir(), self.wiki.date))

        self.show_runner_state("Cleaning up old dumps for %s" % self.db_name)
        self.clean_old_dumps()
        self.clean_old_dumps(private=True)

        # Informing what kind backup work we are about to do
        if self.job_requested:
            if self.restart:
                self.log_and_print("Preparing for restart from job %s of %s"
                                   % (self.job_requested, self.db_name))
            else:
                self.log_and_print("Preparing for job %s of %s" %
                                   (self.job_requested, self.db_name))
        else:
            self.show_runner_state("Starting backup of %s" % self.db_name)

        self.dumpjobdata.do_before_dump()

        for item in self.dump_item_list.dump_items:
            prereq_job = self.do_run_item(item)
            if self.do_prereqs and prereq_job is not None:
                doing = []
                doing.append(item)
                # we have the lock so we might as well run the prereq job now.
                # there may be a string of prereqs not met,
                # i.e. articlesrecombine -> articles -> stubs
                # so we're willing to walk back up the list up to five items,
                # assume there's something really broken if it takes more than that
                while prereq_job is not None and len(doing) < 5:
                    new_item = self.dump_item_list.find_item_by_name(prereq_job)
                    new_item.set_to_run(True)
                    prereq_job = self.do_run_item(new_item)
                    if prereq_job is not None:
                        # this job has a dependency too, add to the todo stack
                        doing.insert(0, new_item)
                # back up the stack and do the dependents if stack isn't too long.
                if len(doing) < 5:
                    for item in doing:
                        self.do_run_item(item)

        # special case
        if self.job_requested == "createdirs":
            if not os.path.exists(os.path.join(self.wiki.public_dir(), self.wiki.date)):
                os.makedirs(os.path.join(self.wiki.public_dir(), self.wiki.date))
            if not os.path.exists(os.path.join(self.wiki.private_dir(), self.wiki.date)):
                os.makedirs(os.path.join(self.wiki.private_dir(), self.wiki.date))

        if self.dump_item_list.all_possible_jobs_done():
            # All jobs are either in status "done", "waiting", "failed", "skipped"
            self.indexhtml.update_index_html("done")
            self.statushtml.update_status_file("done")
        else:
            # This may happen if we start a dump now and abort before all items are
            # done. Then some are left for example in state "waiting". When
            # afterwards running a specific job, all (but one) of the jobs
            # previously in "waiting" are still in status "waiting"
            self.indexhtml.update_index_html("partialdone")
            self.statushtml.update_status_file("partialdone")

        self.dumpjobdata.do_after_dump(self.dump_item_list.dump_items)

        # special case
        if (self.job_requested and self.job_requested == "latestlinks" and
                self.dump_item_list.all_possible_jobs_done()):
            self.dumpjobdata.do_latest_job()

        # Informing about completion
        if self.job_requested:
            if self.restart:
                self.show_runner_state("Completed run restarting from job %s for %s"
                                       % (self.job_requested, self.db_name))
            else:
                self.show_runner_state("Completed job %s for %s"
                                       % (self.job_requested, self.db_name))
        else:
            self.show_runner_state_complete()

        # let caller know if this was a successful run
        if self.failurehandler.failure_count > 0:
            return False
        else:
            return True