def main(): jobids = set([int(id) for id in args]) jobids.update(options.jobids) for fn in options.files: rows = jobtracker.query("SELECT job_files.job_id FROM job_files " \ "LEFT JOIN files " \ "ON files.id = job_files.file_id " \ "WHERE files.filename LIKE '%%%s' " % fn) for row in rows: jobids.add(row['job_id']) print "Number of jobs to kill: %d" % len(jobids) for jobid in jobids: print "Attempting to kill job with id %d" % jobid row = jobtracker.query("SELECT status FROM jobs " \ "WHERE id=%d" % jobid, \ fetchone=True) if row['status'] in ['new', 'retrying']: jobtracker.query("UPDATE jobs " \ "SET status='terminal_failure', " \ "updated_at='%s', " \ "details='Job was killed manually' " \ "WHERE id=%d" % \ (jobtracker.nowstr(), jobid)) print "Job's status has been set to 'terminal_failure'" pipeline_utils.clean_up(jobid) else: print "Only jobs whose status is 'waiting' or 'retrying' " \ "can be killed. (Current status of job %d: %s)" % \ (jobid, row['status'])
def recover_failed_jobs(): """Gather jobs with status 'failed' from the job-tracker DB. For each of these jobs see if it can be re-submitted. If it can, set the status to 'retrying'. If the job cannot be re-submitted, set the status to 'terminal_failure', and delete the raw data (if config is set for deletion). Depending on configurations emails may be sent. """ failed_jobs = jobtracker.query("SELECT * FROM jobs " \ "WHERE status='failed'") for job in failed_jobs: # Count the number of times this job has been submitted already submits = jobtracker.query("SELECT * FROM job_submits " \ "WHERE job_id=%d " \ "ORDER BY id DESC" % job['id']) if len(submits) < config.jobpooler.max_attempts: # We can re-submit this job. # Set status to 'retrying'. jobtracker.query("UPDATE jobs " \ "SET status='retrying', " \ "updated_at='%s', " \ "details='Job will be retried' " \ "WHERE id=%d" % \ (jobtracker.nowstr(), job['id'])) jobpool_cout.outs("Job #%d will be retried." % job['id']) else: # We've run out of attempts for this job if config.basic.delete_rawdata: pipeline_utils.clean_up(job['id']) # Set status to 'terminal_failure'. jobtracker.query("UPDATE jobs " \ "SET status='terminal_failure', " \ "updated_at='%s', " \ "details='Job has failed permanently' " \ "WHERE id=%d" % \ (jobtracker.nowstr(), job['id']))
queries.append("UPDATE jobs " \ "SET status='uploaded', " \ "details='Upload successful (header_id=%d)', " \ "updated_at='%s' " \ "WHERE id=%d" % \ (header_id, jobtracker.nowstr(), job_submit['job_id'])) jobtracker.query(queries) print "Results successfully uploaded" # Tar and Copy final results to HPSS print "Copy results to HPSS" pipeline_utils.copy_results_to_HPSS(dir) if config.basic.delete_rawdata: pipeline_utils.clean_up(job_submit['job_id']) if debug.UPLOAD: upload.upload_timing_summary['End-to-end'] = \ upload.upload_timing_summary.setdefault('End-to-end', 0) + \ (time.time()-starttime) print "Upload timing summary:" for k in sorted(upload.upload_timing_summary.keys()): print " %s: %.2f s" % (k, upload.upload_timing_summary[k]) print "" # Just a blank line def get_fitsfiles(job_submit): """Find the fits files associated with this job. There should be a single file in the job's result
"details='Upload successful (header_id=%d)', " \ "updated_at='%s' " \ "WHERE id=%d" % (header_id, jobtracker.nowstr(), job_submit['id'])) queries.append("UPDATE jobs " \ "SET status='uploaded', " \ "details='Upload successful (header_id=%d)', " \ "updated_at='%s' " \ "WHERE id=%d" % \ (header_id, jobtracker.nowstr(), job_submit['job_id'])) jobtracker.query(queries) print "Results successfully uploaded" if config.basic.delete_rawdata: pipeline_utils.clean_up(job_submit['job_id']) if debug.UPLOAD: upload.upload_timing_summary['End-to-end'] = \ upload.upload_timing_summary.setdefault('End-to-end', 0) + \ (time.time()-starttime) print "Upload timing summary:" for k in sorted(upload.upload_timing_summary.keys()): print " %s: %.2f s" % (k, upload.upload_timing_summary[k]) print "" # Just a blank line def get_fitsfiles(job_submit): """Find the fits files associated with this job. There should be a single file in the job's result directory.
def recover_failed_jobs(): """Gather jobs with status 'failed' from the job-tracker DB. For each of these jobs see if it can be re-submitted. If it can, set the status to 'retrying'. If the job cannot be re-submitted, set the status to 'terminal_failure', and delete the raw data (if config is set for deletion). Depending on configurations emails may be sent. """ failed_jobs = jobtracker.query("SELECT * FROM jobs " \ "WHERE status='failed'") for job in failed_jobs: # Count the number of times this job has been submitted already submits = jobtracker.query("SELECT * FROM job_submits " \ "WHERE job_id=%d " \ "ORDER BY id DESC" % job['id']) if len(submits) < config.jobpooler.max_attempts: # We can re-submit this job. if config.email.send_on_failures: # Send error email msg = "Error! Job submit status: %s\n" % \ submits[0]['status'] msg += "Job ID: %d, Job submit ID: %d\n\n" % \ (job['id'], submits[0]['id']) msg += str(submits[0]['details']) msg += "\n*** Job will be re-submitted to the queue ***\n" mailer.ErrorMailer(msg).send() # Set status to 'retrying'. jobtracker.query("UPDATE jobs " \ "SET status='retrying', " \ "updated_at='%s', " \ "details='Job will be retried' " \ "WHERE id=%d" % \ (jobtracker.nowstr(), job['id'])) jobpool_cout.outs("Job #%d will be retried." % job['id']) else: # We've run out of attempts for this job if config.email.send_on_terminal_failures or \ config.email.send_on_failure: # Send error email msg = "Error! Job submit status: %s\n" % \ str(submits[0]['status']) msg += "Job ID: %d, Job submit ID: %d\n\n" % \ (job['id'], submits[0]['id']) msg += str(submits[0]['details']) msg += "\n*** No more attempts for this job. ***\n" msg += "*** Job will NOT be re-submitted! ***\n" if config.basic.delete_rawdata: jobpool_cout.outs("Job #%d will NOT be retried. " \ "Data files will be deleted." % job['id']) msg += "*** Raw data files will be deleted. ***\n" else: jobpool_cout.outs("Job #%d will NOT be retried. " % job['id']) mailer.ErrorMailer(msg).send() if config.basic.delete_rawdata: pipeline_utils.clean_up(job['id']) # Set status to 'terminal_failure'. jobtracker.query("UPDATE jobs " \ "SET status='terminal_failure', " \ "updated_at='%s', " \ "details='Job has failed permanently' " \ "WHERE id=%d" % \ (jobtracker.nowstr(), job['id']))
def submit(job_row): """ Submits a job to QueueManager, if successful will store returned queue id. Input: job_row: A row from the jobs table. The datafiles associated with this job will be submitted to be processed. Outputs: None """ fns = pipeline_utils.get_fns_for_jobid(job_row['id']) try: presubmission_check(fns) outdir = get_output_dir(fns) # Attempt to submit the job queue_id = config.jobpooler.queue_manager.submit\ (fns, outdir, job_row['id']) except (FailedPreCheckError): # Error caught during presubmission check. exceptionmsgs = traceback.format_exception(*sys.exc_info()) errormsg = "Job ID: %d " % job_row['id'] errormsg += "failed presubmission check!\n\n" errormsg += "".join(exceptionmsgs) jobpool_cout.outs("Job ID: %d failed presubmission check!\n\t%s\n" % \ (job_row['id'], exceptionmsgs[-1])) if config.email.send_on_terminal_failures: # Send error email msg = "Presubmission check failed!\n" msg += "Job ID: %d\n\n" % \ (job_row['id']) msg += errormsg msg += "\n*** Job has been terminally failed. ***\n" msg += "*** Job will NOT be re-submitted! ***\n" if config.basic.delete_rawdata: jobpool_cout.outs("Job #%d will NOT be retried. " \ "Data files will be deleted." % job_row['id']) msg += "*** Raw data files will be deleted. ***\n" else: jobpool_cout.outs("Job #%d will NOT be retried. " % job_row['id']) notification = mailer.ErrorMailer(msg, \ subject="Job failed presubmission check - Terminal") notification.send() if config.basic.delete_rawdata: pipeline_utils.clean_up(job_row['id']) queries = [] arglist = [] queries.append("INSERT INTO job_submits (" \ "job_id, " \ "status, " \ "created_at, " \ "updated_at, " \ "details) " \ "VALUES (?, ?, ?, ?, ?)" ) arglist.append( ( job_row['id'], 'precheck_failed', \ jobtracker.nowstr(), jobtracker.nowstr(), \ errormsg) ) queries.append("UPDATE jobs " \ "SET status='terminal_failure', " \ "details='Failed presubmission check', " \ "updated_at=? " \ "WHERE id=?" ) arglist.append( (jobtracker.nowstr(), job_row['id']) ) jobtracker.execute(queries, arglist) except (queue_managers.QueueManagerJobFatalError,\ datafile.DataFileError): # Error caught during job submission. exceptionmsgs = traceback.format_exception(*sys.exc_info()) errormsg = "Error while submitting job!\n" errormsg += "\tJob ID: %d\n\n" % job_row['id'] errormsg += "".join(exceptionmsgs) jobpool_cout.outs("Error while submitting job!\n" \ "\tJob ID: %d\n\t%s\n" % \ (job_row['id'], exceptionmsgs[-1])) queries = [] arglist = [] queries.append("INSERT INTO job_submits (" \ "job_id, " \ "status, " \ "created_at, " \ "updated_at, " \ "details) " \ "VALUES (?, ?, ?, ?, ?)" ) arglist.append( ( job_row['id'], 'submission_failed', \ jobtracker.nowstr(), jobtracker.nowstr(), \ errormsg) ) queries.append("UPDATE jobs " \ "SET status='failed', " \ "details='Error while submitting job', " \ "updated_at=? " \ "WHERE id=?" ) arglist.append( (jobtracker.nowstr(), job_row['id']) ) jobtracker.execute(queries, arglist) except queue_managers.QueueManagerNonFatalError: # Do nothing. Don't submit the job. Don't mark the job as 'submitted'. # Don't mark the job as 'failed'. The job submission will be retried. pass except queue_managers.QueueManagerFatalError: # A fatal error occurred. Re-raise! raise else: # No error occurred msg = "Submitted job to process:\n" msg += "\tJob ID: %d, Queue ID: %s\n" % (job_row['id'], queue_id) msg += "\tData file(s):\n" for fn in fns: msg += "\t%s\n" % fn jobpool_cout.outs(msg) queries = [] queries.append("INSERT INTO job_submits (" \ "job_id, " \ "queue_id, " \ "output_dir, " \ "status, " \ "created_at, " \ "updated_at, " \ "details) " \ "VALUES (%d,'%s','%s','%s','%s','%s','%s')" % \ (job_row['id'], queue_id, outdir, 'running', \ jobtracker.nowstr(), jobtracker.nowstr(), \ 'Job submitted to queue')) queries.append("UPDATE jobs " \ "SET status='submitted', " \ "details='Job submitted to queue', " \ "updated_at='%s' " \ "WHERE id=%d" % \ (jobtracker.nowstr(), job_row['id'])) jobtracker.query(queries)
def submit(job_row): """ Submits a job to QueueManager, if successful will store returned queue id. Input: job_row: A row from the jobs table. The datafiles associated with this job will be submitted to be processed. Outputs: None """ fns = pipeline_utils.get_fns_for_jobid(job_row['id']) try: presubmission_check(fns) outdir = get_output_dir(fns) # Attempt to submit the job queue_id = config.jobpooler.queue_manager.submit\ (fns, outdir, job_row['id']) except (FailedPreCheckError): # Error caught during presubmission check. exceptionmsgs = traceback.format_exception(*sys.exc_info()) errormsg = "Job ID: %d " % job_row['id'] errormsg += "failed presubmission check!\n\n" errormsg += "".join(exceptionmsgs) jobpool_cout.outs("Job ID: %d failed presubmission check!\n\t%s\n" % \ (job_row['id'], exceptionmsgs[-1])) if config.email.send_on_terminal_failures: # Send error email msg = "Presubmission check failed!\n" msg += "Job ID: %d\n\n" % \ (job_row['id']) msg += errormsg msg += "\n*** Job has been terminally failed. ***\n" msg += "*** Job will NOT be re-submitted! ***\n" if config.basic.delete_rawdata: jobpool_cout.outs("Job #%d will NOT be retried. " \ "Data files will be deleted." % job_row['id']) msg += "*** Raw data files will be deleted. ***\n" else: jobpool_cout.outs("Job #%d will NOT be retried. " % job_row['id']) notification = mailer.ErrorMailer(msg, \ subject="Job failed presubmission check - Terminal") notification.send() if config.basic.delete_rawdata: pipeline_utils.clean_up(job_row['id']) queries = [] arglist = [] queries.append("INSERT INTO job_submits (" \ "job_id, " \ "status, " \ "created_at, " \ "updated_at, " \ "details) " \ "VALUES (?, ?, ?, ?, ?)" ) arglist.append( ( job_row['id'], 'precheck_failed', \ jobtracker.nowstr(), jobtracker.nowstr(), \ errormsg) ) queries.append("UPDATE jobs " \ "SET status='terminal_failure', " \ "details='Failed presubmission check', " \ "updated_at=? " \ "WHERE id=?" ) arglist.append((jobtracker.nowstr(), job_row['id'])) jobtracker.execute(queries, arglist) except (queue_managers.QueueManagerJobFatalError,\ datafile.DataFileError): # Error caught during job submission. exceptionmsgs = traceback.format_exception(*sys.exc_info()) errormsg = "Error while submitting job!\n" errormsg += "\tJob ID: %d\n\n" % job_row['id'] errormsg += "".join(exceptionmsgs) jobpool_cout.outs("Error while submitting job!\n" \ "\tJob ID: %d\n\t%s\n" % \ (job_row['id'], exceptionmsgs[-1])) queries = [] arglist = [] queries.append("INSERT INTO job_submits (" \ "job_id, " \ "status, " \ "created_at, " \ "updated_at, " \ "details) " \ "VALUES (?, ?, ?, ?, ?)" ) arglist.append( ( job_row['id'], 'submission_failed', \ jobtracker.nowstr(), jobtracker.nowstr(), \ errormsg) ) queries.append("UPDATE jobs " \ "SET status='failed', " \ "details='Error while submitting job', " \ "updated_at=? " \ "WHERE id=?" ) arglist.append((jobtracker.nowstr(), job_row['id'])) jobtracker.execute(queries, arglist) except queue_managers.QueueManagerNonFatalError: # Do nothing. Don't submit the job. Don't mark the job as 'submitted'. # Don't mark the job as 'failed'. The job submission will be retried. pass except queue_managers.QueueManagerFatalError: # A fatal error occurred. Re-raise! raise else: # No error occurred msg = "Submitted job to process:\n" msg += "\tJob ID: %d, Queue ID: %s\n" % (job_row['id'], queue_id) msg += "\tData file(s):\n" for fn in fns: msg += "\t%s\n" % fn jobpool_cout.outs(msg) queries = [] queries.append("INSERT INTO job_submits (" \ "job_id, " \ "queue_id, " \ "output_dir, " \ "status, " \ "created_at, " \ "updated_at, " \ "details) " \ "VALUES (%d,'%s','%s','%s','%s','%s','%s')" % \ (job_row['id'], queue_id, outdir, 'running', \ jobtracker.nowstr(), jobtracker.nowstr(), \ 'Job submitted to queue')) queries.append("UPDATE jobs " \ "SET status='submitted', " \ "details='Job submitted to queue', " \ "updated_at='%s' " \ "WHERE id=%d" % \ (jobtracker.nowstr(), job_row['id'])) jobtracker.query(queries)
def submit(job_row): """ Submits a job to QueueManager, if successful will store returned queue id. Input: job_row: A row from the jobs table. The datafiles associated with this job will be submitted to be processed. Outputs: None """ fns = pipeline_utils.get_fns_for_jobid(job_row["id"]) script = os.path.join(config.basic.pipelinedir, "bin", "%s_search.py" % config.basic.survey) # Specify requested resources for job submission if job_row["task"] == "rfifind": res = [4 * 60 * 60, 1024, 25] elif "search" in job_row["task"]: res = [165240, 1024, 28] # 45.9 hrs elif job_row["task"] == "sifting": # Sifting should be quick res = [30 * 60, 256, 5] elif "folding" in job_row["task"]: res = [96 * 60 * 60, 3000, 28] # elif job_row['task']=='tidyup': # res = [30*60, 256, 5] options = job_row["task"] try: SPAN512_job.presubmission_check(fns) outdir = SPAN512_job.get_output_dir(fns) # Attempt to submit the job queue_id = config.jobpooler.queue_manager.submit( fns, outdir, job_row["id"], resources=res, script=script, opts=options ) except (FailedPreCheckError): # Error caught during presubmission check. exceptionmsgs = traceback.format_exception(*sys.exc_info()) errormsg = "Job ID: %d " % job_row["id"] errormsg += "failed presubmission check!\n\n" errormsg += "".join(exceptionmsgs) jobpool_cout.outs("Job ID: %d failed presubmission check!\n\t%s\n" % (job_row["id"], exceptionmsgs[-1])) if config.email.send_on_terminal_failures: # Send error email msg = "Presubmission check failed!\n" msg += "Job ID: %d\n\n" % (job_row["id"]) msg += errormsg msg += "\n*** Job has been terminally failed. ***\n" msg += "*** Job will NOT be re-submitted! ***\n" if config.basic.delete_rawdata: jobpool_cout.outs("Job #%d will NOT be retried. " "Data files will be deleted." % job_row["id"]) msg += "*** Raw data files will be deleted. ***\n" else: jobpool_cout.outs("Job #%d will NOT be retried. " % job_row["id"]) notification = mailer.ErrorMailer(msg, subject="Job failed presubmission check - Terminal") notification.send() if config.basic.delete_rawdata: pipeline_utils.clean_up(job_row["id"]) queries = [] arglist = [] queries.append( "INSERT INTO job_submits (" "job_id, " "status, " "created_at, " "updated_at, " "details) " "VALUES (%d, %s, '%s', '%s', %s)" % (job_row["id"], "precheck_failed", jobtracker.nowstr(), jobtracker.nowstr(), errormsg) ) queries.append( "UPDATE jobs " "SET status='terminal_failure', " "details='Failed presubmission check', " "updated_at='%s'" "WHERE id=%d" % (jobtracker.nowstr(), job_row["id"]) ) jobtracker.query(queries) except (queue_managers.QueueManagerJobFatalError, datafile.DataFileError): # Error caught during job submission. exceptionmsgs = traceback.format_exception(*sys.exc_info()) errormsg = "Error while submitting job!\n" errormsg += "\tJob ID: %d\n\n" % job_row["id"] errormsg += "".join(exceptionmsgs) jobpool_cout.outs("Error while submitting job!\n" "\tJob ID: %d\n\t%s\n" % (job_row["id"], exceptionmsgs[-1])) queries = [] arglist = [] queries.append( "INSERT INTO job_submits (" "job_id, " "status, " "created_at, " "updated_at, " "details) " "VALUES (%d, %s, '%s', '%s', %s)" % (job_row["id"], "submission_failed", jobtracker.nowstr(), jobtracker.nowstr(), errormsg) ) queries.append( "UPDATE jobs " "SET status='failed', " "details='Error while submitting job', " "updated_at='%s' " "WHERE id=%d" % (jobtracker.nowstr(), job_row["id"]) ) jobtracker.execute(queries) except queue_managers.QueueManagerNonFatalError: # Do nothing. Don't submit the job. Don't mark the job as 'submitted'. # Don't mark the job as 'failed'. The job submission will be retried. pass except queue_managers.QueueManagerFatalError: # A fatal error occurred. Re-raise! raise else: # No error occurred msg = "Submitted job to process:\n" msg += "\tJob ID: %d, Queue ID: %s\n" % (job_row["id"], queue_id) msg += "\tData file(s):\n" for fn in fns: msg += "\t%s\n" % fn jobpool_cout.outs(msg) queries = [] queries.append( "INSERT INTO job_submits (" "job_id, " "queue_id, " "output_dir, " "status, " "created_at, " "updated_at, " "details) " "VALUES (%d,'%s','%s','%s','%s','%s','%s')" % ( job_row["id"], queue_id, outdir, "running", jobtracker.nowstr(), jobtracker.nowstr(), "Job submitted to queue", ) ) queries.append( "UPDATE jobs " "SET status='submitted', " "details='Job submitted to queue', " "updated_at='%s' " "WHERE id=%d" % (jobtracker.nowstr(), job_row["id"]) ) jobtracker.query(queries)
def submit(job_row): """ Submits a job to QueueManager, if successful will store returned queue id. Input: job_row: A row from the jobs table. The datafiles associated with this job will be submitted to be processed. Outputs: None """ fns = pipeline_utils.get_fns_for_jobid(job_row['id']) bad_days = ['20170414', '20170419', '20170420', '20170423', '20170423', '20170427', '20170429', '20170503', '20170510', '20170516'] bad_beams = ['b5', 'b6'] for bad_day in bad_days: if bad_day in fns[0]: if (bad_beams[0] in fns[0]) or (bad_beams[1] in fns[0]): print "Files affected by the bad beams 5, 6 60Hz signal: ", fns print "Will delete the raw data files." queries=[] arglist=[] queries.append("INSERT INTO job_submits (" \ "job_id, " \ "status, " \ "created_at, " \ "updated_at, " \ "details) " \ "VALUES (?, ?, ?, ?, ?)" ) arglist.append( ( job_row['id'], 'Beams 5 and 6', \ jobtracker.nowstr(), jobtracker.nowstr(), \ "Affected by 60Hz signal") ) queries.append("UPDATE jobs " \ "SET status='terminal_failure', " \ "details='Beams 5 and 6 affected by 60Hz signal', " \ "updated_at=? " \ "WHERE id=?" ) arglist.append( (jobtracker.nowstr(), job_row['id']) ) jobtracker.execute(queries, arglist) return try: presubmission_check(fns) outdir = get_output_dir(fns) if outdir=="": pass # Attempt to submit the job if job_row['status'] == 'retrying': ## ppn=2 ppn=1 else: ppn=1 if config.jobpooler.alternative_submit_script: print "Submitting:", config.jobpooler.alternative_submit_script queue_id = config.jobpooler.queue_manager.submit\ (fns, outdir, job_row['id'],\ script=config.jobpooler.alternative_submit_script,\ ppn=ppn) else: queue_id = config.jobpooler.queue_manager.submit\ (fns, outdir, job_row['id'], ppn=ppn) except (FailedPreCheckError): # Error caught during presubmission check. exceptionmsgs = traceback.format_exception(*sys.exc_info()) errormsg = "Job ID: %d " % job_row['id'] errormsg += "failed presubmission check!\n\n" errormsg += "".join(exceptionmsgs) jobpool_cout.outs("Job ID: %d failed presubmission check!\n\t%s\n" % \ (job_row['id'], exceptionmsgs[-1])) if config.basic.delete_rawdata: pipeline_utils.clean_up(job_row['id']) queries = [] arglist = [] queries.append("INSERT INTO job_submits (" \ "job_id, " \ "status, " \ "created_at, " \ "updated_at, " \ "details) " \ "VALUES (?, ?, ?, ?, ?)" ) arglist.append( ( job_row['id'], 'precheck_failed', \ jobtracker.nowstr(), jobtracker.nowstr(), \ errormsg) ) queries.append("UPDATE jobs " \ "SET status='terminal_failure', " \ "details='Failed presubmission check', " \ "updated_at=? " \ "WHERE id=?" ) arglist.append( (jobtracker.nowstr(), job_row['id']) ) jobtracker.execute(queries, arglist) except (queue_managers.QueueManagerJobFatalError,\ datafile.DataFileError): # Error caught during job submission. exceptionmsgs = traceback.format_exception(*sys.exc_info()) errormsg = "Error while submitting job!\n" errormsg += "\tJob ID: %d\n\n" % job_row['id'] errormsg += "".join(exceptionmsgs) jobpool_cout.outs("Error while submitting job!\n" \ "\tJob ID: %d\n\t%s\n" % \ (job_row['id'], exceptionmsgs[-1])) queries = [] arglist = [] queries.append("INSERT INTO job_submits (" \ "job_id, " \ "status, " \ "created_at, " \ "updated_at, " \ "details) " \ "VALUES (?, ?, ?, ?, ?)" ) arglist.append( ( job_row['id'], 'submission_failed', \ jobtracker.nowstr(), jobtracker.nowstr(), \ errormsg) ) queries.append("UPDATE jobs " \ "SET status='failed', " \ "details='Error while submitting job', " \ "updated_at=? " \ "WHERE id=?" ) arglist.append( (jobtracker.nowstr(), job_row['id']) ) jobtracker.execute(queries, arglist) except queue_managers.QueueManagerNonFatalError: # Do nothing. Don't submit the job. Don't mark the job as 'submitted'. # Don't mark the job as 'failed'. The job submission will be retried. pass except queue_managers.QueueManagerFatalError: # A fatal error occurred. Re-raise! raise except (MissingFilesError): # Unexpected error exceptionmsgs = traceback.format_exception(*sys.exc_info()) errormsg = "Job ID: %d " % job_row['id'] errormsg += "Raw data files missing from /scratch/ area.!\n\n" errormsg += "".join(exceptionmsgs) queries = [] arglist = [] queries.append("INSERT INTO job_submits (" \ "job_id, " \ "status, " \ "created_at, " \ "updated_at, " \ "details) " \ "VALUES (?, ?, ?, ?, ?)" ) arglist.append( ( job_row['id'], 'submission_failed', \ jobtracker.nowstr(), jobtracker.nowstr(), \ errormsg) ) queries.append("UPDATE jobs " \ "SET status='failed', " \ "details='Error while submitting job', " \ "updated_at=? " \ "WHERE id=?" ) arglist.append( (jobtracker.nowstr(), job_row['id']) ) jobtracker.execute(queries, arglist) print errormsg else: # No error occurred msg = "Submitted job to process:\n" msg += "\tJob ID: %d, Queue ID: %s\n" % (job_row['id'], queue_id) msg += "\tData file(s):\n" for fn in fns: msg += "\t%s\n" % fn jobpool_cout.outs(msg) queries = [] queries.append("INSERT INTO job_submits (" \ "job_id, " \ "queue_id, " \ "output_dir, " \ "status, " \ "created_at, " \ "updated_at, " \ "details) " \ "VALUES (%d,'%s','%s','%s','%s','%s','%s')" % \ (job_row['id'], queue_id, outdir, 'running', \ jobtracker.nowstr(), jobtracker.nowstr(), \ 'Job submitted to queue')) queries.append("UPDATE jobs " \ "SET status='submitted', " \ "details='Job submitted to queue', " \ "updated_at='%s' " \ "WHERE id=%d" % \ (jobtracker.nowstr(), job_row['id'])) jobtracker.query(queries)