예제 #1
0
def main():
    delay = 0.5 # First iteration will set delay=1 or multiply by 2
    num_short_delays = 0
    while True:
        try:
            Downloader.status()
            if Downloader.run():
                # files were successfully downloaded
                delay=1
                num_short_delays = 0
            #elif num_short_delays <= 8:
            #elif num_short_delays <= 4:
                # No files successfully download this iteration
                # Increase sleep time
            #    delay = min((delay*2, 4))
            #    num_short_delays += 1
            else:
                delay = min((delay*2,32))
        except Exception, e:
            if config.email.send_on_crash:
                msg  = '*** Downloader has crashed! ***\n\n'
                msg += 'Fatal error occured while running downloader: %s\n\n' % str(e)
                msg += ''.join(traceback.format_exception(*sys.exc_info()))
                notification = mailer.ErrorMailer(msg, subject="Downloader crash!")
                notification.send()
            sys.stderr.write("Fatal error occurred!\n")
            raise
        print "Will sleep for %d seconds" % (config.background.sleep*delay)
	sys.exit()
        time.sleep(config.background.sleep*delay)
예제 #2
0
def get_output_dir(fns):
    """Given a list of data files, 'fns', generate path to output results.

        path is:
            {base_results_directory}/{mjd}/{obs_name}/{beam_num}/{proc_date}/
        Note: 'base_results_directory' is defined in the processing config file.
                'mjd', 'obs_name', and 'beam_num' are from parsing
                the job's datafiles. 'proc_date' is the current date
                in yymmddThhmmss format.
    """

    # Get info from datafile headers
    data = datafile.autogen_dataobj([fns[0]])
    if not isinstance(data, datafile.PsrfitsData):
        errormsg = "Data must be of PSRFITS format.\n"
        errormsg += "\tData type: %s\n" % type(data)
        raise pipeline_utils.PipelineError(errormsg)

    # Generate output directory
    mjd = int(data.timestamp_mjd)
    beam_num = data.beam_id
    obs_name = data.obs_name
    proc_date = datetime.datetime.now().strftime('%y%m%dT%H%M%S')
    baseoutdir = os.path.join(config.processing.base_results_directory, \
                                    str(mjd), str(obs_name), \
                                    str(beam_num), proc_date)
    outdir = baseoutdir

    # Make sure our output directory doesn't already exist
    counter = 0
    while os.path.exists(outdir):
        counter += 1
        outdir = "%s_%d" % (baseoutdir, counter)

    # Make the directory immediately so the pipeline knows it's taken
    os.makedirs(outdir)

    # Send an email if our first choice for outdir wasn't available
    if counter:
        errormsg = "The first-choice output directory '%s' " \
                    "already existed. Had to settle for '%s' " \
                    "after %d tries. \n\n " \
                    "Data files:\n " \
                    "\t%s" % (baseoutdir, outdir, counter, "\n\t".join(fns))
        notification = mailer.ErrorMailer(errormsg, \
                        subject="Job outdir existance warning")
        notification.send()
    return outdir
예제 #3
0
def main():
    while True:
        try:
            JobUploader.run()
        except Exception, e:
            if config.email.send_on_crash:
                msg = '*** Uploader has crashed! ***\n\n'
                msg += 'Fatal error occured while running job uploader: %s\n\n' % str(
                    e)
                msg += ''.join(traceback.format_exception(*sys.exc_info()))
                notification = mailer.ErrorMailer(msg,
                                                  subject="Uploader crash!")
                notification.send()
            sys.stderr.write("Fatal error occurred!\n")
            raise
        time.sleep(config.background.sleep)
예제 #4
0
def main():
    while True:
        #rotation function changes/updates the states and submits jobs
        #that were created
        try:
            job.status()
            job.rotate()
        except Exception, e:
            if config.email.send_on_crash:
                msg = '*** Job pooler has crashed! ***\n\n'
                msg += 'Fatal error occured while running job pool: %s\n\n' % str(
                    e)
                msg += ''.join(traceback.format_exception(*sys.exc_info()))
                notification = mailer.ErrorMailer(msg).send()
            sys.stderr.write("Fatal error occurred!\n")
            raise
        time.sleep(config.background.sleep)
예제 #5
0
 def download(self, ftp_file_path):
     try_counter = 0
     login = False
     while not login:
         try_counter += 1
         try:
             myFtp = self.login(self.connect())
             login = True
         except (CornellFTPConnectionError, CornellFTPLoginError), e:
             print str(e)
             if try_counter > 7:
                 try:
                     notification = mailer.ErrorMailer(
                         'CornellFTP login failure, retried %u times: %s' %
                         (str(e), try_counter))
                     notification.send()
                 except Exception, e:
                     pass
예제 #6
0
def main():
    while True:
        #rotation function changes/updates the states and submits jobs
        #that were created
        try:
            job.update_jobs_status_from_queue()
            pipeline_utils.move_results()
            job.update_jobs_status_from_queue()
            job.status()
            job.rotate()
        #pipeline_utils.archive_logs()
        except Exception, e:
            if config.email.send_on_crash:
                msg = '*** Job pooler has crashed! ***\n\n'
                msg += 'Fatal error occured while running job pool: %s\n\n' % str(
                    e)
                msg += ''.join(traceback.format_exception(*sys.exc_info()))
                notification = mailer.ErrorMailer(msg,
                                                  subject="Job Pooler crash!")
                notification.send()
            sys.stderr.write("Fatal error occurred!\n")
            raise

        time.sleep(config.background.sleep)
예제 #7
0
import time
import sys
import traceback

import mailer
import JobUploader
import config.background
import config.email

while True:
    try:
        JobUploader.run()
    except Exception, e:
        if config.email.send_on_crash:
            msg = '*** Uploader has crashed! ***\n\n'
            msg += 'Fatal error occured while running job uploader: %s\n\n' % str(
                e)
            msg += ''.join(traceback.format_exception(*sys.exc_info()))
            notification = mailer.ErrorMailer(msg).send()
        sys.stderr.write("Fatal error occurred!\n")
        raise
    time.sleep(config.background.sleep)
예제 #8
0
파일: job.py 프로젝트: kstovall/pipeline2.0
def recover_failed_jobs():
    """Gather jobs with status 'failed' from the job-tracker DB.
        For each of these jobs see if it can be re-submitted.
        If it can, set the status to 'retrying'. If the
        job cannot be re-submitted, set the status to 'terminal_failure',
        and delete the raw data (if config is set for deletion).

        Depending on configurations emails may be sent.
    """
    failed_jobs = jobtracker.query("SELECT * FROM jobs " \
                                   "WHERE status='failed'")

    for job in failed_jobs:
        # Count the number of times this job has been submitted already
        submits = jobtracker.query("SELECT * FROM job_submits " \
                                   "WHERE job_id=%d " \
                                   "ORDER BY id DESC" % job['id'])
        if len(submits) < config.jobpooler.max_attempts:
            # We can re-submit this job.
            if config.email.send_on_failures:
                # Send error email
                msg  = "Error! Job submit status: %s\n" % \
                            submits[0]['status']
                msg += "Job ID: %d, Job submit ID: %d\n\n" % \
                        (job['id'], submits[0]['id'])
                msg += str(submits[0]['details'])
                msg += "\n*** Job will be re-submitted to the queue ***\n"
                mailer.ErrorMailer(msg).send()

            # Set status to 'retrying'.
            jobtracker.query("UPDATE jobs " \
                             "SET status='retrying', " \
                                  "updated_at='%s', " \
                                  "details='Job will be retried' " \
                             "WHERE id=%d" % \
                             (jobtracker.nowstr(), job['id']))
            jobpool_cout.outs("Job #%d will be retried." % job['id'])
        else:
            # We've run out of attempts for this job
            if config.email.send_on_terminal_failures or \
                    config.email.send_on_failure:
                # Send error email
                msg  = "Error! Job submit status: %s\n" % \
                            str(submits[0]['status'])
                msg += "Job ID: %d, Job submit ID: %d\n\n" % \
                        (job['id'], submits[0]['id'])
                msg += str(submits[0]['details'])
                msg += "\n*** No more attempts for this job. ***\n"
                msg += "*** Job will NOT be re-submitted! ***\n"
                if config.basic.delete_rawdata:
                    jobpool_cout.outs("Job #%d will NOT be retried. " \
                                        "Data files will be deleted." % job['id'])
                    msg += "*** Raw data files will be deleted. ***\n"
                else:
                    jobpool_cout.outs("Job #%d will NOT be retried. " %
                                      job['id'])
                mailer.ErrorMailer(msg).send()

            if config.basic.delete_rawdata:
                pipeline_utils.clean_up(job['id'])

            # Set status to 'terminal_failure'.
            jobtracker.query("UPDATE jobs " \
                             "SET status='terminal_failure', " \
                                  "updated_at='%s', " \
                                  "details='Job has failed permanently' " \
                             "WHERE id=%d" % \
                             (jobtracker.nowstr(), job['id']))
예제 #9
0
def submit(job_row):
    """
    Submits a job to QueueManager, if successful will store returned queue id.

    Input:
        job_row: A row from the jobs table. The datafiles associated
            with this job will be submitted to be processed.
    Outputs:
        None
    """
    fns = pipeline_utils.get_fns_for_jobid(job_row['id'])

    try:
        presubmission_check(fns)
        outdir = get_output_dir(fns)
        # Attempt to submit the job
        queue_id = config.jobpooler.queue_manager.submit\
                            (fns, outdir, job_row['id'])
    except (FailedPreCheckError):
        # Error caught during presubmission check.
        exceptionmsgs = traceback.format_exception(*sys.exc_info())
        errormsg = "Job ID: %d " % job_row['id']
        errormsg += "failed presubmission check!\n\n"
        errormsg += "".join(exceptionmsgs)

        jobpool_cout.outs("Job ID: %d failed presubmission check!\n\t%s\n" % \
                          (job_row['id'], exceptionmsgs[-1]))

        if config.email.send_on_terminal_failures:
            # Send error email
            msg = "Presubmission check failed!\n"
            msg += "Job ID: %d\n\n" % \
                    (job_row['id'])
            msg += errormsg
            msg += "\n*** Job has been terminally failed. ***\n"
            msg += "*** Job will NOT be re-submitted! ***\n"
            if config.basic.delete_rawdata:
                jobpool_cout.outs("Job #%d will NOT be retried. " \
                                    "Data files will be deleted." % job_row['id'])
                msg += "*** Raw data files will be deleted. ***\n"
            else:
                jobpool_cout.outs("Job #%d will NOT be retried. " %
                                  job_row['id'])
            notification = mailer.ErrorMailer(msg, \
                            subject="Job failed presubmission check - Terminal")
            notification.send()

        if config.basic.delete_rawdata:
            pipeline_utils.clean_up(job_row['id'])

        queries = []
        arglist = []
        queries.append("INSERT INTO job_submits (" \
                            "job_id, " \
                            "status, " \
                            "created_at, " \
                            "updated_at, " \
                            "details) " \
                      "VALUES (?, ?, ?, ?, ?)" )
        arglist.append( ( job_row['id'], 'precheck_failed', \
                        jobtracker.nowstr(), jobtracker.nowstr(), \
                        errormsg) )
        queries.append("UPDATE jobs " \
                       "SET status='terminal_failure', " \
                            "details='Failed presubmission check', " \
                            "updated_at=? " \
                       "WHERE id=?" )
        arglist.append((jobtracker.nowstr(), job_row['id']))
        jobtracker.execute(queries, arglist)
    except (queue_managers.QueueManagerJobFatalError,\
              datafile.DataFileError):
        # Error caught during job submission.
        exceptionmsgs = traceback.format_exception(*sys.exc_info())
        errormsg = "Error while submitting job!\n"
        errormsg += "\tJob ID: %d\n\n" % job_row['id']
        errormsg += "".join(exceptionmsgs)

        jobpool_cout.outs("Error while submitting job!\n" \
                          "\tJob ID: %d\n\t%s\n" % \
                          (job_row['id'], exceptionmsgs[-1]))

        queries = []
        arglist = []
        queries.append("INSERT INTO job_submits (" \
                            "job_id, " \
                            "status, " \
                            "created_at, " \
                            "updated_at, " \
                            "details) " \
                      "VALUES (?, ?, ?, ?, ?)" )
        arglist.append( ( job_row['id'], 'submission_failed', \
                        jobtracker.nowstr(), jobtracker.nowstr(), \
                        errormsg) )
        queries.append("UPDATE jobs " \
                       "SET status='failed', " \
                            "details='Error while submitting job', " \
                            "updated_at=? " \
                       "WHERE id=?" )
        arglist.append((jobtracker.nowstr(), job_row['id']))
        jobtracker.execute(queries, arglist)
    except queue_managers.QueueManagerNonFatalError:
        # Do nothing. Don't submit the job. Don't mark the job as 'submitted'.
        # Don't mark the job as 'failed'. The job submission will be retried.
        pass
    except queue_managers.QueueManagerFatalError:
        # A fatal error occurred. Re-raise!
        raise
    else:
        # No error occurred
        msg = "Submitted job to process:\n"
        msg += "\tJob ID: %d, Queue ID: %s\n" % (job_row['id'], queue_id)
        msg += "\tData file(s):\n"
        for fn in fns:
            msg += "\t%s\n" % fn
        jobpool_cout.outs(msg)
        queries = []
        queries.append("INSERT INTO job_submits (" \
                            "job_id, " \
                            "queue_id, " \
                            "output_dir, " \
                            "status, " \
                            "created_at, " \
                            "updated_at, " \
                            "details) " \
                      "VALUES (%d,'%s','%s','%s','%s','%s','%s')" % \
                      (job_row['id'], queue_id, outdir, 'running', \
                        jobtracker.nowstr(), jobtracker.nowstr(), \
                        'Job submitted to queue'))
        queries.append("UPDATE jobs " \
                       "SET status='submitted', " \
                            "details='Job submitted to queue', " \
                            "updated_at='%s' " \
                       "WHERE id=%d" % \
                    (jobtracker.nowstr(), job_row['id']))
        jobtracker.query(queries)