def do_run_item(self, item): """ run the specified dump job (item) if it is marked to be run """ prereq_job = None Maintenance.exit_if_in_maintenance_mode( "In maintenance mode, exiting dump of %s at step %s" % (self.db_name, item.name())) if item.to_run(): item.start() self.report.update_index_html_and_json() self.statushtml.update_status_file() self.runstatus_updater.write_statusapi_file() self.specialfiles_updater.write_specialfilesinfo_file() self.dumpjobdata.do_before_job(self.dump_item_list.dump_items) try: item.dump(self) except Exception as ex: exc_type, exc_value, exc_traceback = sys.exc_info() if self.verbose: sys.stderr.write(repr(traceback.format_exception( exc_type, exc_value, exc_traceback))) if (exc_type.__name__ == 'BackupPrereqError' or exc_type.__name__ == 'BackupError'): error_message = str(ex) if error_message.startswith("Required job "): prereq_job = error_message.split(" ")[2] self.debug(error_message) if prereq_job is None: # exception that doesn't have to do with missing prereqs. self.debug("*** exception! " + str(ex)) self.debug(repr(traceback.format_exception( exc_type, exc_value, exc_traceback))) item.set_status("failed") if item.status() == "done": self.dumpjobdata.do_after_job(item, self.dump_item_list.dump_items) elif item.status() == "waiting" or item.status() == "skipped": # don't update the checksum files for this item. pass else: # Here for example status is "failed". But maybe also # "in-progress", if an item chooses to override dump(...) and # forgets to set the status. This is a failure as well. self.run_handle_failure() return prereq_job
def run(self): if self.job_requested: if not self.dump_item_list.old_runinfo_retrieved and self.wiki.exists_perdump_index(): # There was a previous run of all or part of this date, but... # There was no old RunInfo to be had (or an error was encountered getting it) # so we can't rerun a step and keep all the status information # about the old run around. # In this case ask the user if they reeeaaally want to go ahead print "No information about the previous run for this date could be retrieved." print "This means that the status information about the old run will be lost, and" print "only the information about the current (and future) runs will be kept." reply = raw_input("Continue anyways? [y/N]: ") if reply not in ["y", "Y"]: raise RuntimeError("No run information available for previous dump, exiting") if not self.dump_item_list.mark_dumps_to_run(self.job_requested, self.skipdone): # probably no such job sys.stderr.write("No job marked to run, exiting") return None if self.restart: # mark all the following jobs to run as well self.dump_item_list.mark_following_jobs_to_run(self.skipdone) else: self.dump_item_list.mark_all_jobs_to_run(self.skipdone) Maintenance.exit_if_in_maintenance_mode( "In maintenance mode, exiting dump of %s" % self.db_name) self.make_dir(os.path.join(self.wiki.public_dir(), self.wiki.date)) self.make_dir(os.path.join(self.wiki.private_dir(), self.wiki.date)) self.show_runner_state("Cleaning up old dumps for %s" % self.db_name) self.clean_old_dumps() self.clean_old_dumps(private=True) # Informing what kind backup work we are about to do if self.job_requested: if self.restart: self.log_and_print("Preparing for restart from job %s of %s" % (self.job_requested, self.db_name)) else: self.log_and_print("Preparing for job %s of %s" % (self.job_requested, self.db_name)) else: self.show_runner_state("Starting backup of %s" % self.db_name) self.dumpjobdata.do_before_dump() for item in self.dump_item_list.dump_items: prereq_job = self.do_run_item(item) if self.do_prereqs and prereq_job is not None: doing = [] doing.append(item) # we have the lock so we might as well run the prereq job now. # there may be a string of prereqs not met, # i.e. articlesrecombine -> articles -> stubs # so we're willing to walk back up the list up to five items, # assume there's something really broken if it takes more than that while prereq_job is not None and len(doing) < 5: new_item = self.dump_item_list.find_item_by_name(prereq_job) new_item.set_to_run(True) prereq_job = self.do_run_item(new_item) if prereq_job is not None: # this job has a dependency too, add to the todo stack doing.insert(0, new_item) # back up the stack and do the dependents if stack isn't too long. if len(doing) < 5: for item in doing: self.do_run_item(item) # special case if self.job_requested == "createdirs": if not os.path.exists(os.path.join(self.wiki.public_dir(), self.wiki.date)): os.makedirs(os.path.join(self.wiki.public_dir(), self.wiki.date)) if not os.path.exists(os.path.join(self.wiki.private_dir(), self.wiki.date)): os.makedirs(os.path.join(self.wiki.private_dir(), self.wiki.date)) if self.dump_item_list.all_possible_jobs_done(): # All jobs are either in status "done", "waiting", "failed", "skipped" self.indexhtml.update_index_html("done") self.statushtml.update_status_file("done") else: # This may happen if we start a dump now and abort before all items are # done. Then some are left for example in state "waiting". When # afterwards running a specific job, all (but one) of the jobs # previously in "waiting" are still in status "waiting" self.indexhtml.update_index_html("partialdone") self.statushtml.update_status_file("partialdone") self.dumpjobdata.do_after_dump(self.dump_item_list.dump_items) # special case if (self.job_requested and self.job_requested == "latestlinks" and self.dump_item_list.all_possible_jobs_done()): self.dumpjobdata.do_latest_job() # Informing about completion if self.job_requested: if self.restart: self.show_runner_state("Completed run restarting from job %s for %s" % (self.job_requested, self.db_name)) else: self.show_runner_state("Completed job %s for %s" % (self.job_requested, self.db_name)) else: self.show_runner_state_complete() # let caller know if this was a successful run if self.failurehandler.failure_count > 0: return False else: return True