예제 #1
0
def main():
    jobids = set([int(id) for id in args])
    jobids.update(options.jobids)

    for fn in options.files:
        rows = jobtracker.query("SELECT job_files.job_id FROM job_files " \
                               "LEFT JOIN files " \
                                    "ON files.id = job_files.file_id " \
                               "WHERE files.filename LIKE '%%%s' " % fn)
        for row in rows:
            jobids.add(row['job_id'])
    print "Number of jobs to kill: %d" % len(jobids)
    for jobid in jobids:
        print "Attempting to kill job with id %d" % jobid
        row = jobtracker.query("SELECT status FROM jobs " \
                                "WHERE id=%d" % jobid, \
                                fetchone=True)
        if row['status'] in ['new', 'retrying']:
            jobtracker.query("UPDATE jobs " \
                             "SET status='terminal_failure', " \
                                  "updated_at='%s', " \
                                  "details='Job was killed manually' " \
                             "WHERE id=%d" % \
                             (jobtracker.nowstr(), jobid))
            print "Job's status has been set to 'terminal_failure'"
            pipeline_utils.clean_up(jobid)
        else:
            print "Only jobs whose status is 'waiting' or 'retrying' " \
                  "can be killed. (Current status of job %d: %s)" % \
                  (jobid, row['status'])
예제 #2
0
def main():
    jobids = set([int(id) for id in args])
    jobids.update(options.jobids)

    for fn in options.files:
        rows = jobtracker.query("SELECT job_files.job_id FROM job_files " \
                               "LEFT JOIN files " \
                                    "ON files.id = job_files.file_id " \
                               "WHERE files.filename LIKE '%%%s' " % fn)
        for row in rows:
            jobids.add(row['job_id'])
    print "Number of jobs to kill: %d" % len(jobids)
    for jobid in jobids:
        print "Attempting to kill job with id %d" % jobid
        row = jobtracker.query("SELECT status FROM jobs " \
                                "WHERE id=%d" % jobid, \
                                fetchone=True)
        if row['status'] in ['new', 'retrying']:
            jobtracker.query("UPDATE jobs " \
                             "SET status='terminal_failure', " \
                                  "updated_at='%s', " \
                                  "details='Job was killed manually' " \
                             "WHERE id=%d" % \
                             (jobtracker.nowstr(), jobid))
            print "Job's status has been set to 'terminal_failure'"
            pipeline_utils.clean_up(jobid)
        else:
            print "Only jobs whose status is 'waiting' or 'retrying' " \
                  "can be killed. (Current status of job %d: %s)" % \
                  (jobid, row['status'])
예제 #3
0
def recover_failed_jobs():
    """Gather jobs with status 'failed' from the job-tracker DB.
        For each of these jobs see if it can be re-submitted.
        If it can, set the status to 'retrying'. If the
        job cannot be re-submitted, set the status to 'terminal_failure',
        and delete the raw data (if config is set for deletion).

        Depending on configurations emails may be sent.
    """
    failed_jobs = jobtracker.query("SELECT * FROM jobs " \
                                   "WHERE status='failed'")

    for job in failed_jobs:
        # Count the number of times this job has been submitted already
        submits = jobtracker.query("SELECT * FROM job_submits " \
                                   "WHERE job_id=%d " \
                                   "ORDER BY id DESC" % job['id'])

        if len(submits) < config.jobpooler.max_attempts:
            # We can re-submit this job.
            # Set status to 'retrying'.
            jobtracker.query("UPDATE jobs " \
                             "SET status='retrying', " \
                                  "updated_at='%s', " \
                                  "details='Job will be retried' " \
                             "WHERE id=%d" % \
                             (jobtracker.nowstr(), job['id']))
            jobpool_cout.outs("Job #%d will be retried." % job['id'])
        else:
            # We've run out of attempts for this job
            if config.basic.delete_rawdata:
                pipeline_utils.clean_up(job['id'])

            # Set status to 'terminal_failure'.
            jobtracker.query("UPDATE jobs " \
                             "SET status='terminal_failure', " \
                                  "updated_at='%s', " \
                                  "details='Job has failed permanently' " \
                             "WHERE id=%d" % \
                             (jobtracker.nowstr(), job['id']))
예제 #4
0
	    queries.append("UPDATE jobs " \
			   "SET status='uploaded', " \
				"details='Upload successful (header_id=%d)', " \
				"updated_at='%s' " \
			   "WHERE id=%d" % \
			   (header_id, jobtracker.nowstr(), job_submit['job_id']))
	    jobtracker.query(queries)

	    print "Results successfully uploaded"

	    # Tar and Copy final results to HPSS
	    print "Copy results to HPSS"
	    pipeline_utils.copy_results_to_HPSS(dir)

	    if config.basic.delete_rawdata:
		pipeline_utils.clean_up(job_submit['job_id'])

	    if debug.UPLOAD: 
		upload.upload_timing_summary['End-to-end'] = \
		    upload.upload_timing_summary.setdefault('End-to-end', 0) + \
		    (time.time()-starttime)
		print "Upload timing summary:"
		for k in sorted(upload.upload_timing_summary.keys()):
		    print "    %s: %.2f s" % (k, upload.upload_timing_summary[k])
	    print "" # Just a blank line

       

def get_fitsfiles(job_submit):
    """Find the fits files associated with this job.
        There should be a single file in the job's result
예제 #5
0
                            "details='Upload successful (header_id=%d)', " \
                            "updated_at='%s' " \
                       "WHERE id=%d" %
                       (header_id, jobtracker.nowstr(), job_submit['id']))
        queries.append("UPDATE jobs " \
                       "SET status='uploaded', " \
                            "details='Upload successful (header_id=%d)', " \
                            "updated_at='%s' " \
                       "WHERE id=%d" % \
                       (header_id, jobtracker.nowstr(), job_submit['job_id']))
        jobtracker.query(queries)

        print "Results successfully uploaded"

        if config.basic.delete_rawdata:
            pipeline_utils.clean_up(job_submit['job_id'])

        if debug.UPLOAD:
            upload.upload_timing_summary['End-to-end'] = \
                upload.upload_timing_summary.setdefault('End-to-end', 0) + \
                (time.time()-starttime)
            print "Upload timing summary:"
            for k in sorted(upload.upload_timing_summary.keys()):
                print "    %s: %.2f s" % (k, upload.upload_timing_summary[k])
        print ""  # Just a blank line


def get_fitsfiles(job_submit):
    """Find the fits files associated with this job.
        There should be a single file in the job's result
        directory.
예제 #6
0
파일: job.py 프로젝트: sn1p3/pipeline2.0
def recover_failed_jobs():
    """Gather jobs with status 'failed' from the job-tracker DB.
        For each of these jobs see if it can be re-submitted.
        If it can, set the status to 'retrying'. If the
        job cannot be re-submitted, set the status to 'terminal_failure',
        and delete the raw data (if config is set for deletion).

        Depending on configurations emails may be sent.
    """
    failed_jobs = jobtracker.query("SELECT * FROM jobs " \
                                   "WHERE status='failed'")

    for job in failed_jobs:
        # Count the number of times this job has been submitted already
        submits = jobtracker.query("SELECT * FROM job_submits " \
                                   "WHERE job_id=%d " \
                                   "ORDER BY id DESC" % job['id'])
        if len(submits) < config.jobpooler.max_attempts:
            # We can re-submit this job. 
            if config.email.send_on_failures:
                # Send error email
                msg  = "Error! Job submit status: %s\n" % \
                            submits[0]['status']
                msg += "Job ID: %d, Job submit ID: %d\n\n" % \
                        (job['id'], submits[0]['id'])
                msg += str(submits[0]['details'])
                msg += "\n*** Job will be re-submitted to the queue ***\n"
                mailer.ErrorMailer(msg).send()

            # Set status to 'retrying'.
            jobtracker.query("UPDATE jobs " \
                             "SET status='retrying', " \
                                  "updated_at='%s', " \
                                  "details='Job will be retried' " \
                             "WHERE id=%d" % \
                             (jobtracker.nowstr(), job['id']))
            jobpool_cout.outs("Job #%d will be retried." % job['id'])
        else:
            # We've run out of attempts for this job
            if config.email.send_on_terminal_failures or \
                    config.email.send_on_failure:
                # Send error email
                msg  = "Error! Job submit status: %s\n" % \
                            str(submits[0]['status'])
                msg += "Job ID: %d, Job submit ID: %d\n\n" % \
                        (job['id'], submits[0]['id'])
                msg += str(submits[0]['details'])
                msg += "\n*** No more attempts for this job. ***\n"
                msg += "*** Job will NOT be re-submitted! ***\n"
                if config.basic.delete_rawdata:
                    jobpool_cout.outs("Job #%d will NOT be retried. " \
                                        "Data files will be deleted." % job['id'])
                    msg += "*** Raw data files will be deleted. ***\n"
                else:
                    jobpool_cout.outs("Job #%d will NOT be retried. " % job['id'])
                mailer.ErrorMailer(msg).send()

            if config.basic.delete_rawdata:
                pipeline_utils.clean_up(job['id'])

            # Set status to 'terminal_failure'.
            jobtracker.query("UPDATE jobs " \
                             "SET status='terminal_failure', " \
                                  "updated_at='%s', " \
                                  "details='Job has failed permanently' " \
                             "WHERE id=%d" % \
                             (jobtracker.nowstr(), job['id']))
예제 #7
0
def submit(job_row):
    """
    Submits a job to QueueManager, if successful will store returned queue id.

    Input:
        job_row: A row from the jobs table. The datafiles associated
            with this job will be submitted to be processed.
    Outputs:
        None
    """
    fns = pipeline_utils.get_fns_for_jobid(job_row['id']) 
    
    try:
        presubmission_check(fns)
        outdir = get_output_dir(fns)
        # Attempt to submit the job
        queue_id = config.jobpooler.queue_manager.submit\
                            (fns, outdir, job_row['id'])
    except (FailedPreCheckError):
        # Error caught during presubmission check.
        exceptionmsgs = traceback.format_exception(*sys.exc_info())
        errormsg = "Job ID: %d " % job_row['id']
        errormsg += "failed presubmission check!\n\n"
        errormsg += "".join(exceptionmsgs)

        jobpool_cout.outs("Job ID: %d failed presubmission check!\n\t%s\n" % \
                          (job_row['id'], exceptionmsgs[-1])) 
        
        if config.email.send_on_terminal_failures:
            # Send error email
            msg  = "Presubmission check failed!\n"
            msg += "Job ID: %d\n\n" % \
                    (job_row['id'])
            msg += errormsg
            msg += "\n*** Job has been terminally failed. ***\n"
            msg += "*** Job will NOT be re-submitted! ***\n"
            if config.basic.delete_rawdata:
                jobpool_cout.outs("Job #%d will NOT be retried. " \
                                    "Data files will be deleted." % job_row['id'])
                msg += "*** Raw data files will be deleted. ***\n"
            else:
                jobpool_cout.outs("Job #%d will NOT be retried. " % job_row['id'])
            notification = mailer.ErrorMailer(msg, \
                            subject="Job failed presubmission check - Terminal")
            notification.send()

        if config.basic.delete_rawdata:
            pipeline_utils.clean_up(job_row['id'])

        queries = []
        arglist = []
        queries.append("INSERT INTO job_submits (" \
                            "job_id, " \
                            "status, " \
                            "created_at, " \
                            "updated_at, " \
                            "details) " \
                      "VALUES (?, ?, ?, ?, ?)" )
        arglist.append( ( job_row['id'], 'precheck_failed', \
                        jobtracker.nowstr(), jobtracker.nowstr(), \
                        errormsg) )
        queries.append("UPDATE jobs " \
                       "SET status='terminal_failure', " \
                            "details='Failed presubmission check', " \
                            "updated_at=? " \
                       "WHERE id=?" )
        arglist.append( (jobtracker.nowstr(), job_row['id']) )
        jobtracker.execute(queries, arglist)
    except (queue_managers.QueueManagerJobFatalError,\
              datafile.DataFileError):
        # Error caught during job submission.
        exceptionmsgs = traceback.format_exception(*sys.exc_info())
        errormsg  = "Error while submitting job!\n"
        errormsg += "\tJob ID: %d\n\n" % job_row['id']
        errormsg += "".join(exceptionmsgs)

        jobpool_cout.outs("Error while submitting job!\n" \
                          "\tJob ID: %d\n\t%s\n" % \
                          (job_row['id'], exceptionmsgs[-1])) 
        
        queries = []
        arglist = []
        queries.append("INSERT INTO job_submits (" \
                            "job_id, " \
                            "status, " \
                            "created_at, " \
                            "updated_at, " \
                            "details) " \
                      "VALUES (?, ?, ?, ?, ?)" )
        arglist.append( ( job_row['id'], 'submission_failed', \
                        jobtracker.nowstr(), jobtracker.nowstr(), \
                        errormsg) )
        queries.append("UPDATE jobs " \
                       "SET status='failed', " \
                            "details='Error while submitting job', " \
                            "updated_at=? " \
                       "WHERE id=?" )
        arglist.append( (jobtracker.nowstr(), job_row['id']) )
        jobtracker.execute(queries, arglist)
    except queue_managers.QueueManagerNonFatalError:
        # Do nothing. Don't submit the job. Don't mark the job as 'submitted'.
        # Don't mark the job as 'failed'. The job submission will be retried.
        pass
    except queue_managers.QueueManagerFatalError:
        # A fatal error occurred. Re-raise!
        raise
    else: 
        # No error occurred
        msg  = "Submitted job to process:\n" 
        msg += "\tJob ID: %d, Queue ID: %s\n" % (job_row['id'], queue_id) 
        msg += "\tData file(s):\n" 
        for fn in fns:
            msg += "\t%s\n" % fn
        jobpool_cout.outs(msg)
        queries = []
        queries.append("INSERT INTO job_submits (" \
                            "job_id, " \
                            "queue_id, " \
                            "output_dir, " \
                            "status, " \
                            "created_at, " \
                            "updated_at, " \
                            "details) " \
                      "VALUES (%d,'%s','%s','%s','%s','%s','%s')" % \
                      (job_row['id'], queue_id, outdir, 'running', \
                        jobtracker.nowstr(), jobtracker.nowstr(), \
                        'Job submitted to queue'))
        queries.append("UPDATE jobs " \
                       "SET status='submitted', " \
                            "details='Job submitted to queue', " \
                            "updated_at='%s' " \
                       "WHERE id=%d" % \
                    (jobtracker.nowstr(), job_row['id']))
        jobtracker.query(queries)
예제 #8
0
파일: job.py 프로젝트: kstovall/pipeline2.0
def recover_failed_jobs():
    """Gather jobs with status 'failed' from the job-tracker DB.
        For each of these jobs see if it can be re-submitted.
        If it can, set the status to 'retrying'. If the
        job cannot be re-submitted, set the status to 'terminal_failure',
        and delete the raw data (if config is set for deletion).

        Depending on configurations emails may be sent.
    """
    failed_jobs = jobtracker.query("SELECT * FROM jobs " \
                                   "WHERE status='failed'")

    for job in failed_jobs:
        # Count the number of times this job has been submitted already
        submits = jobtracker.query("SELECT * FROM job_submits " \
                                   "WHERE job_id=%d " \
                                   "ORDER BY id DESC" % job['id'])
        if len(submits) < config.jobpooler.max_attempts:
            # We can re-submit this job.
            if config.email.send_on_failures:
                # Send error email
                msg  = "Error! Job submit status: %s\n" % \
                            submits[0]['status']
                msg += "Job ID: %d, Job submit ID: %d\n\n" % \
                        (job['id'], submits[0]['id'])
                msg += str(submits[0]['details'])
                msg += "\n*** Job will be re-submitted to the queue ***\n"
                mailer.ErrorMailer(msg).send()

            # Set status to 'retrying'.
            jobtracker.query("UPDATE jobs " \
                             "SET status='retrying', " \
                                  "updated_at='%s', " \
                                  "details='Job will be retried' " \
                             "WHERE id=%d" % \
                             (jobtracker.nowstr(), job['id']))
            jobpool_cout.outs("Job #%d will be retried." % job['id'])
        else:
            # We've run out of attempts for this job
            if config.email.send_on_terminal_failures or \
                    config.email.send_on_failure:
                # Send error email
                msg  = "Error! Job submit status: %s\n" % \
                            str(submits[0]['status'])
                msg += "Job ID: %d, Job submit ID: %d\n\n" % \
                        (job['id'], submits[0]['id'])
                msg += str(submits[0]['details'])
                msg += "\n*** No more attempts for this job. ***\n"
                msg += "*** Job will NOT be re-submitted! ***\n"
                if config.basic.delete_rawdata:
                    jobpool_cout.outs("Job #%d will NOT be retried. " \
                                        "Data files will be deleted." % job['id'])
                    msg += "*** Raw data files will be deleted. ***\n"
                else:
                    jobpool_cout.outs("Job #%d will NOT be retried. " %
                                      job['id'])
                mailer.ErrorMailer(msg).send()

            if config.basic.delete_rawdata:
                pipeline_utils.clean_up(job['id'])

            # Set status to 'terminal_failure'.
            jobtracker.query("UPDATE jobs " \
                             "SET status='terminal_failure', " \
                                  "updated_at='%s', " \
                                  "details='Job has failed permanently' " \
                             "WHERE id=%d" % \
                             (jobtracker.nowstr(), job['id']))
예제 #9
0
def submit(job_row):
    """
    Submits a job to QueueManager, if successful will store returned queue id.

    Input:
        job_row: A row from the jobs table. The datafiles associated
            with this job will be submitted to be processed.
    Outputs:
        None
    """
    fns = pipeline_utils.get_fns_for_jobid(job_row['id'])

    try:
        presubmission_check(fns)
        outdir = get_output_dir(fns)
        # Attempt to submit the job
        queue_id = config.jobpooler.queue_manager.submit\
                            (fns, outdir, job_row['id'])
    except (FailedPreCheckError):
        # Error caught during presubmission check.
        exceptionmsgs = traceback.format_exception(*sys.exc_info())
        errormsg = "Job ID: %d " % job_row['id']
        errormsg += "failed presubmission check!\n\n"
        errormsg += "".join(exceptionmsgs)

        jobpool_cout.outs("Job ID: %d failed presubmission check!\n\t%s\n" % \
                          (job_row['id'], exceptionmsgs[-1]))

        if config.email.send_on_terminal_failures:
            # Send error email
            msg = "Presubmission check failed!\n"
            msg += "Job ID: %d\n\n" % \
                    (job_row['id'])
            msg += errormsg
            msg += "\n*** Job has been terminally failed. ***\n"
            msg += "*** Job will NOT be re-submitted! ***\n"
            if config.basic.delete_rawdata:
                jobpool_cout.outs("Job #%d will NOT be retried. " \
                                    "Data files will be deleted." % job_row['id'])
                msg += "*** Raw data files will be deleted. ***\n"
            else:
                jobpool_cout.outs("Job #%d will NOT be retried. " %
                                  job_row['id'])
            notification = mailer.ErrorMailer(msg, \
                            subject="Job failed presubmission check - Terminal")
            notification.send()

        if config.basic.delete_rawdata:
            pipeline_utils.clean_up(job_row['id'])

        queries = []
        arglist = []
        queries.append("INSERT INTO job_submits (" \
                            "job_id, " \
                            "status, " \
                            "created_at, " \
                            "updated_at, " \
                            "details) " \
                      "VALUES (?, ?, ?, ?, ?)" )
        arglist.append( ( job_row['id'], 'precheck_failed', \
                        jobtracker.nowstr(), jobtracker.nowstr(), \
                        errormsg) )
        queries.append("UPDATE jobs " \
                       "SET status='terminal_failure', " \
                            "details='Failed presubmission check', " \
                            "updated_at=? " \
                       "WHERE id=?" )
        arglist.append((jobtracker.nowstr(), job_row['id']))
        jobtracker.execute(queries, arglist)
    except (queue_managers.QueueManagerJobFatalError,\
              datafile.DataFileError):
        # Error caught during job submission.
        exceptionmsgs = traceback.format_exception(*sys.exc_info())
        errormsg = "Error while submitting job!\n"
        errormsg += "\tJob ID: %d\n\n" % job_row['id']
        errormsg += "".join(exceptionmsgs)

        jobpool_cout.outs("Error while submitting job!\n" \
                          "\tJob ID: %d\n\t%s\n" % \
                          (job_row['id'], exceptionmsgs[-1]))

        queries = []
        arglist = []
        queries.append("INSERT INTO job_submits (" \
                            "job_id, " \
                            "status, " \
                            "created_at, " \
                            "updated_at, " \
                            "details) " \
                      "VALUES (?, ?, ?, ?, ?)" )
        arglist.append( ( job_row['id'], 'submission_failed', \
                        jobtracker.nowstr(), jobtracker.nowstr(), \
                        errormsg) )
        queries.append("UPDATE jobs " \
                       "SET status='failed', " \
                            "details='Error while submitting job', " \
                            "updated_at=? " \
                       "WHERE id=?" )
        arglist.append((jobtracker.nowstr(), job_row['id']))
        jobtracker.execute(queries, arglist)
    except queue_managers.QueueManagerNonFatalError:
        # Do nothing. Don't submit the job. Don't mark the job as 'submitted'.
        # Don't mark the job as 'failed'. The job submission will be retried.
        pass
    except queue_managers.QueueManagerFatalError:
        # A fatal error occurred. Re-raise!
        raise
    else:
        # No error occurred
        msg = "Submitted job to process:\n"
        msg += "\tJob ID: %d, Queue ID: %s\n" % (job_row['id'], queue_id)
        msg += "\tData file(s):\n"
        for fn in fns:
            msg += "\t%s\n" % fn
        jobpool_cout.outs(msg)
        queries = []
        queries.append("INSERT INTO job_submits (" \
                            "job_id, " \
                            "queue_id, " \
                            "output_dir, " \
                            "status, " \
                            "created_at, " \
                            "updated_at, " \
                            "details) " \
                      "VALUES (%d,'%s','%s','%s','%s','%s','%s')" % \
                      (job_row['id'], queue_id, outdir, 'running', \
                        jobtracker.nowstr(), jobtracker.nowstr(), \
                        'Job submitted to queue'))
        queries.append("UPDATE jobs " \
                       "SET status='submitted', " \
                            "details='Job submitted to queue', " \
                            "updated_at='%s' " \
                       "WHERE id=%d" % \
                    (jobtracker.nowstr(), job_row['id']))
        jobtracker.query(queries)
예제 #10
0
def submit(job_row):
    """
    Submits a job to QueueManager, if successful will store returned queue id.

    Input:
        job_row: A row from the jobs table. The datafiles associated
            with this job will be submitted to be processed.
    Outputs:
        None
    """
    fns = pipeline_utils.get_fns_for_jobid(job_row["id"])

    script = os.path.join(config.basic.pipelinedir, "bin", "%s_search.py" % config.basic.survey)

    # Specify requested resources for job submission
    if job_row["task"] == "rfifind":
        res = [4 * 60 * 60, 1024, 25]
    elif "search" in job_row["task"]:
        res = [165240, 1024, 28]  # 45.9 hrs
    elif job_row["task"] == "sifting":  # Sifting should be quick
        res = [30 * 60, 256, 5]
    elif "folding" in job_row["task"]:
        res = [96 * 60 * 60, 3000, 28]
    # elif job_row['task']=='tidyup':
    #    res = [30*60, 256, 5]
    options = job_row["task"]

    try:
        SPAN512_job.presubmission_check(fns)
        outdir = SPAN512_job.get_output_dir(fns)
        # Attempt to submit the job
        queue_id = config.jobpooler.queue_manager.submit(
            fns, outdir, job_row["id"], resources=res, script=script, opts=options
        )
    except (FailedPreCheckError):
        # Error caught during presubmission check.
        exceptionmsgs = traceback.format_exception(*sys.exc_info())
        errormsg = "Job ID: %d " % job_row["id"]
        errormsg += "failed presubmission check!\n\n"
        errormsg += "".join(exceptionmsgs)

        jobpool_cout.outs("Job ID: %d failed presubmission check!\n\t%s\n" % (job_row["id"], exceptionmsgs[-1]))

        if config.email.send_on_terminal_failures:
            # Send error email
            msg = "Presubmission check failed!\n"
            msg += "Job ID: %d\n\n" % (job_row["id"])
            msg += errormsg
            msg += "\n*** Job has been terminally failed. ***\n"
            msg += "*** Job will NOT be re-submitted! ***\n"
            if config.basic.delete_rawdata:
                jobpool_cout.outs("Job #%d will NOT be retried. " "Data files will be deleted." % job_row["id"])
                msg += "*** Raw data files will be deleted. ***\n"
            else:
                jobpool_cout.outs("Job #%d will NOT be retried. " % job_row["id"])
            notification = mailer.ErrorMailer(msg, subject="Job failed presubmission check - Terminal")
            notification.send()

        if config.basic.delete_rawdata:
            pipeline_utils.clean_up(job_row["id"])

        queries = []
        arglist = []
        queries.append(
            "INSERT INTO job_submits ("
            "job_id, "
            "status, "
            "created_at, "
            "updated_at, "
            "details) "
            "VALUES (%d, %s, '%s', '%s', %s)"
            % (job_row["id"], "precheck_failed", jobtracker.nowstr(), jobtracker.nowstr(), errormsg)
        )
        queries.append(
            "UPDATE jobs "
            "SET status='terminal_failure', "
            "details='Failed presubmission check', "
            "updated_at='%s'"
            "WHERE id=%d" % (jobtracker.nowstr(), job_row["id"])
        )
        jobtracker.query(queries)

    except (queue_managers.QueueManagerJobFatalError, datafile.DataFileError):
        # Error caught during job submission.
        exceptionmsgs = traceback.format_exception(*sys.exc_info())
        errormsg = "Error while submitting job!\n"
        errormsg += "\tJob ID: %d\n\n" % job_row["id"]
        errormsg += "".join(exceptionmsgs)

        jobpool_cout.outs("Error while submitting job!\n" "\tJob ID: %d\n\t%s\n" % (job_row["id"], exceptionmsgs[-1]))

        queries = []
        arglist = []
        queries.append(
            "INSERT INTO job_submits ("
            "job_id, "
            "status, "
            "created_at, "
            "updated_at, "
            "details) "
            "VALUES (%d, %s, '%s', '%s', %s)"
            % (job_row["id"], "submission_failed", jobtracker.nowstr(), jobtracker.nowstr(), errormsg)
        )
        queries.append(
            "UPDATE jobs "
            "SET status='failed', "
            "details='Error while submitting job', "
            "updated_at='%s' "
            "WHERE id=%d" % (jobtracker.nowstr(), job_row["id"])
        )
        jobtracker.execute(queries)
    except queue_managers.QueueManagerNonFatalError:
        # Do nothing. Don't submit the job. Don't mark the job as 'submitted'.
        # Don't mark the job as 'failed'. The job submission will be retried.
        pass
    except queue_managers.QueueManagerFatalError:
        # A fatal error occurred. Re-raise!
        raise
    else:
        # No error occurred
        msg = "Submitted job to process:\n"
        msg += "\tJob ID: %d, Queue ID: %s\n" % (job_row["id"], queue_id)
        msg += "\tData file(s):\n"
        for fn in fns:
            msg += "\t%s\n" % fn
        jobpool_cout.outs(msg)
        queries = []
        queries.append(
            "INSERT INTO job_submits ("
            "job_id, "
            "queue_id, "
            "output_dir, "
            "status, "
            "created_at, "
            "updated_at, "
            "details) "
            "VALUES (%d,'%s','%s','%s','%s','%s','%s')"
            % (
                job_row["id"],
                queue_id,
                outdir,
                "running",
                jobtracker.nowstr(),
                jobtracker.nowstr(),
                "Job submitted to queue",
            )
        )
        queries.append(
            "UPDATE jobs "
            "SET status='submitted', "
            "details='Job submitted to queue', "
            "updated_at='%s' "
            "WHERE id=%d" % (jobtracker.nowstr(), job_row["id"])
        )
        jobtracker.query(queries)
예제 #11
0
def submit(job_row):
    """
    Submits a job to QueueManager, if successful will store returned queue id.

    Input:
        job_row: A row from the jobs table. The datafiles associated
            with this job will be submitted to be processed.
    Outputs:
        None
    """
    fns = pipeline_utils.get_fns_for_jobid(job_row['id']) 
    bad_days = ['20170414', '20170419', '20170420', '20170423', '20170423', '20170427', '20170429', '20170503', '20170510', '20170516']
    bad_beams = ['b5', 'b6']
    for bad_day in bad_days:
        if bad_day in fns[0]:
            if (bad_beams[0] in fns[0]) or (bad_beams[1] in fns[0]):
                print "Files affected by the bad beams 5, 6 60Hz signal: ", fns
                print "Will delete the raw data files."
                queries=[]
                arglist=[]
                queries.append("INSERT INTO job_submits (" \
                                    "job_id, " \
                                    "status, " \
                                    "created_at, " \
                                    "updated_at, " \
                                    "details) " \
                                    "VALUES (?, ?, ?, ?, ?)" )
                arglist.append( ( job_row['id'], 'Beams 5 and 6', \
                                  jobtracker.nowstr(), jobtracker.nowstr(), \
                                  "Affected by 60Hz signal") )
                queries.append("UPDATE jobs " \
                               "SET status='terminal_failure', " \
                               "details='Beams 5 and 6 affected by 60Hz signal', " \
                               "updated_at=? " \
                               "WHERE id=?" )
                arglist.append( (jobtracker.nowstr(), job_row['id']) )
                jobtracker.execute(queries, arglist)
                return
    try:
        presubmission_check(fns)
        outdir = get_output_dir(fns)
        if outdir=="":
            pass
        # Attempt to submit the job
        if job_row['status'] == 'retrying':
	##            ppn=2
            ppn=1
        else: 
            ppn=1
        if config.jobpooler.alternative_submit_script:
            print "Submitting:", config.jobpooler.alternative_submit_script
            queue_id = config.jobpooler.queue_manager.submit\
                        (fns, outdir, job_row['id'],\
                         script=config.jobpooler.alternative_submit_script,\
                         ppn=ppn)
        else:
            queue_id = config.jobpooler.queue_manager.submit\
                        (fns, outdir, job_row['id'], ppn=ppn)
    except (FailedPreCheckError):
        # Error caught during presubmission check.
        exceptionmsgs = traceback.format_exception(*sys.exc_info())
        errormsg = "Job ID: %d " % job_row['id']
        errormsg += "failed presubmission check!\n\n"
        errormsg += "".join(exceptionmsgs)

        jobpool_cout.outs("Job ID: %d failed presubmission check!\n\t%s\n" % \
                          (job_row['id'], exceptionmsgs[-1])) 

        if config.basic.delete_rawdata:
            pipeline_utils.clean_up(job_row['id'])

        queries = []
        arglist = []
        queries.append("INSERT INTO job_submits (" \
                            "job_id, " \
                            "status, " \
                            "created_at, " \
                            "updated_at, " \
                            "details) " \
                      "VALUES (?, ?, ?, ?, ?)" )
        arglist.append( ( job_row['id'], 'precheck_failed', \
                        jobtracker.nowstr(), jobtracker.nowstr(), \
                        errormsg) )
        queries.append("UPDATE jobs " \
                       "SET status='terminal_failure', " \
                            "details='Failed presubmission check', " \
                            "updated_at=? " \
                       "WHERE id=?" )
        arglist.append( (jobtracker.nowstr(), job_row['id']) )
        jobtracker.execute(queries, arglist)
    except (queue_managers.QueueManagerJobFatalError,\
              datafile.DataFileError):
        # Error caught during job submission.
        exceptionmsgs = traceback.format_exception(*sys.exc_info())
        errormsg  = "Error while submitting job!\n"
        errormsg += "\tJob ID: %d\n\n" % job_row['id']
        errormsg += "".join(exceptionmsgs)

        jobpool_cout.outs("Error while submitting job!\n" \
                          "\tJob ID: %d\n\t%s\n" % \
                          (job_row['id'], exceptionmsgs[-1])) 
        
        queries = []
        arglist = []
        queries.append("INSERT INTO job_submits (" \
                            "job_id, " \
                            "status, " \
                            "created_at, " \
                            "updated_at, " \
                            "details) " \
                      "VALUES (?, ?, ?, ?, ?)" )
        arglist.append( ( job_row['id'], 'submission_failed', \
                        jobtracker.nowstr(), jobtracker.nowstr(), \
                        errormsg) )
        queries.append("UPDATE jobs " \
                       "SET status='failed', " \
                            "details='Error while submitting job', " \
                            "updated_at=? " \
                       "WHERE id=?" )
        arglist.append( (jobtracker.nowstr(), job_row['id']) )
        jobtracker.execute(queries, arglist)
    except queue_managers.QueueManagerNonFatalError:
        # Do nothing. Don't submit the job. Don't mark the job as 'submitted'.
        # Don't mark the job as 'failed'. The job submission will be retried.
        pass
    except queue_managers.QueueManagerFatalError:
        # A fatal error occurred. Re-raise!
        raise
    except (MissingFilesError):
        # Unexpected error
        exceptionmsgs = traceback.format_exception(*sys.exc_info())
        errormsg = "Job ID: %d " % job_row['id']
        errormsg += "Raw data files missing from /scratch/ area.!\n\n"
        errormsg += "".join(exceptionmsgs)
        queries = []
        arglist = []
        queries.append("INSERT INTO job_submits (" \
                            "job_id, " \
                            "status, " \
                            "created_at, " \
                            "updated_at, " \
                            "details) " \
                      "VALUES (?, ?, ?, ?, ?)" )
        arglist.append( ( job_row['id'], 'submission_failed', \
                        jobtracker.nowstr(), jobtracker.nowstr(), \
                        errormsg) )
        queries.append("UPDATE jobs " \
                       "SET status='failed', " \
                            "details='Error while submitting job', " \
                            "updated_at=? " \
                       "WHERE id=?" )
        arglist.append( (jobtracker.nowstr(), job_row['id']) )
        jobtracker.execute(queries, arglist)
        print errormsg
    else: 
        # No error occurred
        msg  = "Submitted job to process:\n" 
        msg += "\tJob ID: %d, Queue ID: %s\n" % (job_row['id'], queue_id) 
        msg += "\tData file(s):\n" 
        for fn in fns:
            msg += "\t%s\n" % fn
        jobpool_cout.outs(msg)
        queries = []
        queries.append("INSERT INTO job_submits (" \
                            "job_id, " \
                            "queue_id, " \
                            "output_dir, " \
                            "status, " \
                            "created_at, " \
                            "updated_at, " \
                            "details) " \
                      "VALUES (%d,'%s','%s','%s','%s','%s','%s')" % \
                      (job_row['id'], queue_id, outdir, 'running', \
                        jobtracker.nowstr(), jobtracker.nowstr(), \
                        'Job submitted to queue'))
        queries.append("UPDATE jobs " \
                       "SET status='submitted', " \
                            "details='Job submitted to queue', " \
                            "updated_at='%s' " \
                       "WHERE id=%d" % \
                    (jobtracker.nowstr(), job_row['id']))
        jobtracker.query(queries)