Beispiel #1
0
def make_request():
    """Make a request for data to be restored by connecting to the
        web services at Cornell.
    """
    dlm_cout.outs("Requesting data")
    num_beams = 1
    web_service = CornellWebservice.Client()
    guid = web_service.Restore(username=config.download.api_username, \
                               pw=config.download.api_password, \
                               pipeline=config.basic.pipeline.lower(), \
                               number=num_beams, \
                               bits=config.download.request_numbits, \
                               fileType=config.download.request_datatype)
    if guid == "fail":
        raise pipeline_utils.PipelineError(
            "Request for restore returned 'fail'.")

    requests = jobtracker.query("SELECT * FROM requests " \
                             "WHERE guid='%s'" % guid)
    if requests:
        # Entries in the requests table exist with this GUID!?
        raise pipeline_utils.PipelineError("There are %d requests in the " \
                                           "job-tracker DB with this GUID %s" % \
                                           (len(requests), guid))

    jobtracker.query("INSERT INTO requests ( " \
                        "guid, " \
                        "created_at, " \
                        "updated_at, " \
                        "status, " \
                        "details) " \
                     "VALUES ('%s', '%s', '%s', '%s', '%s')" % \
                     (guid, jobtracker.nowstr(), jobtracker.nowstr(), 'waiting', \
                        'Newly created request'))
Beispiel #2
0
def get_output_dir(fns):
    """Given a list of data files, 'fns', generate path to output results.

        path is:
            {base_results_directory}/{mjd}/{obs_name}/{beam_num}/{proc_date}/
        Note: 'base_results_directory' is defined in the config file.
                'mjd', 'obs_name', and 'beam_num' are from parsing
                the job's datafiles. 'proc_date' is the current date
                in YYMMDD format.
    """
    # Check that files exist
    missingfiles = [fn for fn in fns if not os.path.exists(fn)]
    if missingfiles:
        errormsg = "The following files cannot be found:\n"
        for missing in missingfiles:
            errormsg += "\t%s\n" % missing
        raise pipeline_utils.PipelineError(errormsg)

    # Get info from datafile headers
    data = datafile.autogen_dataobj([fns[0]])
    if not isinstance(data, datafile.PsrfitsData):
        errormsg = "Data must be of PSRFITS format.\n"
        errormsg += "\tData type: %s\n" % type(data)
        raise pipeline_utils.PipelineError(errormsg)

    # Generate output directory
    mjd = int(data.timestamp_mjd)
    beam_num = data.beam_id
    obs_name = data.obs_name
    proc_date = datetime.datetime.now().strftime('%y%m%d')
    outdir = os.path.join(config.jobpooler.base_results_directory, \
                                    str(mjd), str(obs_name), \
                                    str(beam_num), proc_date)
    return outdir
Beispiel #3
0
    def submit(self, datafiles, outdir):
        """Submits a job to the queue to be processed.
            Returns a unique identifier for the job.

            Inputs:
                datafiles: A list of the datafiles being processed.
                outdir: The directory where results will be copied to.

            Output:
                jobid: A unique job identifier.
        
            *** NOTE: A pipeline_utils.PipelineError is raised if
                        the queue submission fails.
        """
        searchscript = os.path.join(config.basic.pipelinedir, 'bin',
                                    'search.py')
        cmd = 'qsub -M %s -V -v DATAFILES="%s",OUTDIR="%s" -l %s -N %s -e %s -o /dev/null %s' % \
                        (config.email.recipient, ';'.join(datafiles), outdir, self.resource_list, \
                            self.job_basename, self.qsublogdir, searchscript)
        pipe = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, \
                                stdin=subprocess.PIPE)
        queue_id = pipe.communicate()[0].strip()
        pipe.stdin.close()
        if not queue_id:
            errormsg = "No job identifier returned by qsub!\n"
            errormsg += "\tCommand executed: %s\n" % cmd
            raise pipeline_utils.PipelineError(errormsg)
        else:
            # There is occasionally a short delay between submission and
            # the job appearing on the queue, so sleep for 1 second.
            time.sleep(1)
        return queue_id
Beispiel #4
0
    def delete(self, queue_id):
        """Remove the job identified by 'queue_id' from the queue.

        Input:
            queue_id: Unique identifier for a job.
        
        Output:
            None
            
            *** NOTE: A pipeline_utils.PipelineError is raised if
                        the job removal fails.
        """
        cmd = "canceljob %s" % queue_id
        pipe = subprocess.Popen(cmd, shell=True)

        # Wait a few seconds a see if the job is still being tracked by
        # the queue manager, or if it marked as exiting.
        time.sleep(5)

        #force queue update
        queue, comm_err = self._showq(update_time=0)

        state = self._check_job_state(queue_id)
        if ('Completed' not in state) and ('Canceling'
                                           not in state) and ('DNE'
                                                              not in state):
            errormsg = "The job (%s) is still in the queue " % queue_id
            errormsg += "and is marked as state '%s'!\n" % state
            raise pipeline_utils.PipelineError(errormsg)
Beispiel #5
0
def get_output_dir(fns):
    """Given a list of data files, 'fns', generate path to output results.

        path is:
            {base_results_directory}/{mjd}/{obs_name}/{beam_num}/{proc_date}/
        Note: 'base_results_directory' is defined in the processing config file.
                'mjd', 'obs_name', and 'beam_num' are from parsing
                the job's datafiles. 'proc_date' is the current date
                in yymmddThhmmss format.
    """

    # Get info from datafile headers
    data = datafile.autogen_dataobj([fns[0]])
    if not isinstance(data, datafile.PsrfitsData):
        errormsg = "Data must be of PSRFITS format.\n"
        errormsg += "\tData type: %s\n" % type(data)

        raise pipeline_utils.PipelineError(errormsg)

    # Generate output directory
    mjd = int(data.timestamp_mjd)
    beam_num = data.beam_id
    obs_name = data.obs_name
    proc_date = datetime.datetime.now().strftime('%y%m%dT%H%M%S')
    baseoutdir = os.path.join(config.processing.base_results_directory, \
                                    str(mjd), str(obs_name), \
                                    str(beam_num), proc_date)
    outdir = baseoutdir

    # Make sure our output directory doesn't already exist
    counter = 0
    while os.path.exists(outdir):
        counter += 1
        outdir = "%s_%d" % (baseoutdir, counter)

    # Make the directory immediately so the pipeline knows it's taken
    try:
        os.makedirs(outdir)
        os.chmod(outdir, 0777)
        created = True
        return outdir, created
    except:
        current_usr = getpass.getuser()
        errormsg = "Permission denied to %s when attempting to create output directory %s" % (
            current_usr, outdir)
        f = open(
            '/scratch/eparent/eparent/PALFA4/results/missing_permission.out',
            'a')
        f.write(errormsg + '\n')
        f.close()
        print errormsg
        created = False
        return outdir, created
Beispiel #6
0
    def submit(self, datafiles, outdir, \
                script=os.path.join(config.basic.pipelinedir, 'bin', 'search.py')):
        """Submits a job to the queue to be processed.
            Returns a unique identifier for the job.

            Inputs:
                datafiles: A list of the datafiles being processed.
                outdir: The directory where results will be copied to.
                script: The script to submit to the queue. (Default:
                        '{config.basic.pipelinedir}/bin/search.py')

            Output:
                jobid: A unique job identifier.
        
            *** NOTE: A pipeline_utils.PipelineError is raised if
                        the queue submission fails.
        """
        node = self._get_submit_node()
        if node is None:
            errormsg = "No nodes to accept job submission!\n"
            raise pipeline_utils.PipelineError(errormsg)
        errorlog = os.path.join(config.basic.qsublog_dir, "'$PBS_JOBID'.ER")
        stdoutlog = os.devnull
        cmd = "qsub -V -v DATAFILES='%s',OUTDIR='%s' -l nodes=%s:ppn=1 -N %s -e %s -o %s %s" % \
                        (';'.join(datafiles), outdir, node, \
                            self.job_basename, errorlog, stdoutlog, script)
        pipe = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, \
                                stdin=subprocess.PIPE)
        queue_id = pipe.communicate()[0].strip()
        pipe.stdin.close()
        if not queue_id:
            errormsg  = "No job identifier returned by qsub!\n"
            errormsg += "\tCommand executed: %s\n" % cmd
            raise pipeline_utils.PipelineError(errormsg)
        else:
            # There is occasionally a short delay between submission and 
            # the job appearing on the queue, so sleep for 1 second. 
            time.sleep(1)
        return queue_id
Beispiel #7
0
def get_output_dir(fns):
    """Given a list of data files, 'fns', generate path to output results.

        path is:
            {base_results_directory}/{mjd}/{obs_name}/{beam_num}/{proc_date}/
        Note: 'base_results_directory' is defined in the processing config file.
                'mjd', 'obs_name', and 'beam_num' are from parsing
                the job's datafiles. 'proc_date' is the current date
                in yymmddThhmmss format.
    """

    # Get info from datafile headers
    data = datafile.autogen_dataobj([fns[0]])
    if not isinstance(data, datafile.PsrfitsData):
        errormsg = "Data must be of PSRFITS format.\n"
        errormsg += "\tData type: %s\n" % type(data)
        raise pipeline_utils.PipelineError(errormsg)

    # Generate output directory
    mjd = int(data.timestamp_mjd)
    beam_num = data.beam_id
    obs_name = data.obs_name
    proc_date = datetime.datetime.now().strftime('%y%m%dT%H%M%S')
    baseoutdir = os.path.join(config.processing.base_results_directory, \
                                    str(mjd), str(obs_name), \
                                    str(beam_num), proc_date)
    outdir = baseoutdir

    # Make sure our output directory doesn't already exist
    counter = 0
    while os.path.exists(outdir):
        counter += 1
        outdir = "%s_%d" % (baseoutdir, counter)

    # Make the directory immediately so the pipeline knows it's taken
    os.makedirs(outdir)

    # Send an email if our first choice for outdir wasn't available
    if counter:
        errormsg = "The first-choice output directory '%s' " \
                    "already existed. Had to settle for '%s' " \
                    "after %d tries. \n\n " \
                    "Data files:\n " \
                    "\t%s" % (baseoutdir, outdir, counter, "\n\t".join(fns))
        notification = mailer.ErrorMailer(errormsg, \
                        subject="Job outdir existance warning")
        notification.send()
    return outdir
Beispiel #8
0
def make_request():
    """Make a request for data to be restored by connecting to the
        web services at Cornell.
    """
    dlm_cout.outs("Requesting data")
    num_beams = 1
    web_service = suds.client.Client(config.download.api_service_url).service
    try:
        guid = web_service.Restore(username=config.download.api_username, \
                                   pw=config.download.api_password, \
                                   number=num_beams, \
                                   bits=config.download.request_numbits, \
                                   fileType=config.download.request_datatype)
    except urllib2.URLError, e:
        raise pipeline_utils.PipelineError("urllib2.URLError caught when " \
                                           "making a request for restore: %s" % \
                                           str(e))
Beispiel #9
0
def presubmission_check(fns):
    """Check to see if datafiles meet the critera for submission.
    """
    # Check that files exist
    missingfiles = [fn for fn in fns if not os.path.exists(fn)]
    if missingfiles:
        errormsg = "The following files cannot be found:\n"
        for missing in missingfiles:
            errormsg += "\t%s\n" % missing
        raise pipeline_utils.PipelineError(
            errormsg)  # if files missing want to crash
    try:
        #for WAPP, check if coords are in table
        data = datafile.autogen_dataobj([fns[0]])
        if not isinstance(data, datafile.PsrfitsData):
            errormsg = "Data must be of PSRFITS format.\n"
            errormsg += "\tData type: %s\n" % type(data)
            raise FailedPreCheckError(errormsg)
    except datafile.DataFileError, e:
        raise FailedPreCheckError(e)
Beispiel #10
0
    def delete(self, queue_id):
        """Remove the job identified by 'queue_id' from the queue.

        Input:
            queue_id: Unique identifier for a job.
        
        Output:
            None
            
            *** NOTE: A pipeline_utils.PipelineError is raised if
                        the job removal fails.
        """
        cmd = "qsig -s SIGINT %s" % queue_id
        pipe = subprocess.Popen(cmd, shell=True)

        # Wait a few seconds a see if the job is still being tracked by
        # the queue manager, or if it marked as exiting.
        time.sleep(5)
        batch = PBSQuery.PBSQuery().getjobs()
        if (queue_id in batch) and ('E' not in batch[queue_id]['job_state']):
            errormsg = "The job (%s) is still in the queue " % queue_id
            errormsg += "and is not marked as exiting (status = 'E')!\n"
            raise pipeline_utils.PipelineError(errormsg)
Beispiel #11
0
    """
    dlm_cout.outs("Requesting data")
    num_beams = 1
    web_service = suds.client.Client(config.download.api_service_url).service
    try:
        guid = web_service.Restore(username=config.download.api_username, \
                                   pw=config.download.api_password, \
                                   number=num_beams, \
                                   bits=config.download.request_numbits, \
                                   fileType=config.download.request_datatype)
    except urllib2.URLError, e:
        raise pipeline_utils.PipelineError("urllib2.URLError caught when " \
                                           "making a request for restore: %s" % \
                                           str(e))
    if guid == "fail":
        raise pipeline_utils.PipelineError(
            "Request for restore returned 'fail'.")

    requests = jobtracker.query("SELECT * FROM requests " \
                             "WHERE guid='%s'" % guid)
    if requests:
        # Entries in the requests table exist with this GUID!?
        raise pipeline_utils.PipelineError("There are %d requests in the " \
                                           "job-tracker DB with this GUID %s" % \
                                           (len(requests), guid))

    jobtracker.query("INSERT INTO requests ( " \
                        "guid, " \
                        "created_at, " \
                        "updated_at, " \
                        "status, " \
                        "details) " \
Beispiel #12
0
                break

        # remove temporary dir for PFDs
        shutil.rmtree(tempdir)
        # remove temporary dir for SPDs
        shutil.rmtree(tempdir_sp)

        if attempts >= 5:
            errmsg = "FTP upload failed after %d connection failures!\n" % attempts
            sys.stderr.write(errmsg)
            sys.stderr.write(
                "\tRolling back DB transaction and raising error.\n")

            # Rolling back changes (just last uncommited FTP).
            db.rollback()
            raise pipeline_utils.PipelineError(errmsg)

        else:
            # Update database statuses
            queries = []
            queries.append("UPDATE job_submits " \
             "SET status='uploaded', " \
           "details='Upload successful (header_id=%d)', " \
           "updated_at='%s' " \
             "WHERE id=%d" %
             (header_id, jobtracker.nowstr(), job_submit['id']))
            queries.append("UPDATE jobs " \
             "SET status='uploaded', " \
           "details='Upload successful (header_id=%d)', " \
           "updated_at='%s' " \
             "WHERE id=%d" % \