def get_output_dir(fns): """Given a list of data files, 'fns', generate path to output results. path is: {base_results_directory}/{mjd}/{obs_name}/{beam_num}/{proc_date}/ Note: 'base_results_directory' is defined in the config file. 'mjd', 'obs_name', and 'beam_num' are from parsing the job's datafiles. 'proc_date' is the current date in YYMMDD format. """ # Check that files exist missingfiles = [fn for fn in fns if not os.path.exists(fn)] if missingfiles: errormsg = "The following files cannot be found:\n" for missing in missingfiles: errormsg += "\t%s\n" % missing raise pipeline_utils.PipelineError(errormsg) # Get info from datafile headers data = datafile.autogen_dataobj([fns[0]]) if not isinstance(data, datafile.PsrfitsData): errormsg = "Data must be of PSRFITS format.\n" errormsg += "\tData type: %s\n" % type(data) raise pipeline_utils.PipelineError(errormsg) # Generate output directory mjd = int(data.timestamp_mjd) beam_num = data.beam_id obs_name = data.obs_name proc_date=datetime.datetime.now().strftime('%y%m%d') outdir = os.path.join(config.jobpooler.base_results_directory, \ str(mjd), str(obs_name), \ str(beam_num), proc_date) return outdir
def get_output_dir(fns): """Given a list of data files, 'fns', generate path to output results. path is: {base_results_directory}/{mjd}/{obs_name}/{beam_num}/{proc_date}/ Note: 'base_results_directory' is defined in the config file. 'mjd', 'obs_name', and 'beam_num' are from parsing the job's datafiles. 'proc_date' is the current date in YYMMDD format. """ # Check that files exist missingfiles = [fn for fn in fns if not os.path.exists(fn)] if missingfiles: errormsg = "The following files cannot be found:\n" for missing in missingfiles: errormsg += "\t%s\n" % missing raise pipeline_utils.PipelineError(errormsg) # Get info from datafile headers data = datafile.autogen_dataobj([fns[0]]) if not isinstance(data, datafile.PsrfitsData): errormsg = "Data must be of PSRFITS format.\n" errormsg += "\tData type: %s\n" % type(data) raise pipeline_utils.PipelineError(errormsg) # Generate output directory mjd = int(data.timestamp_mjd) beam_num = data.beam_id obs_name = data.obs_name proc_date = datetime.datetime.now().strftime('%y%m%d') outdir = os.path.join(config.jobpooler.base_results_directory, \ str(mjd), str(obs_name), \ str(beam_num), proc_date) return outdir
def __init__(self, filenms, resultsdir): # Where to dump all the results self.outputdir = resultsdir self.filenms = filenms self.filenmstr = ' '.join(self.filenms) self.basefilenm = os.path.split(filenms[0])[1].rstrip(".fits") # Read info from PSRFITS file data = datafile.autogen_dataobj(self.filenms) # Correct positions in data file headers for WappPsrfitsData if isinstance(data, datafile.WappPsrfitsData): data.update_positions() spec_info = data.specinfo self.backend = spec_info.backend self.MJD = spec_info.start_MJD[0] self.ra_string = spec_info.ra_str self.dec_string = spec_info.dec_str self.orig_N = spec_info.N self.dt = spec_info.dt # in sec self.BW = spec_info.BW self.orig_T = spec_info.T # Downsampling is catered to the number of samples per row. # self.N = psr_utils.choose_N(self.orig_N) self.N = self.orig_N self.T = self.N * self.dt self.nchan = spec_info.num_channels self.samp_per_row = spec_info.spectra_per_subint self.fctr = spec_info.fctr self.numrows = np.sum(spec_info.num_subint) # Determine the average barycentric velocity of the observation self.baryv = get_baryv(self.ra_string, self.dec_string, self.MJD, self.T, obs="AO") # Figure out which host we are processing on self.hostname = socket.gethostname() # The fraction of the data recommended to be masked by rfifind self.masked_fraction = 0.0 # The number of candidates folded self.num_cands_folded = 0 # Initialize our timers self.rfifind_time = 0.0 self.downsample_time = 0.0 self.subbanding_time = 0.0 self.dedispersing_time = 0.0 self.FFT_time = 0.0 self.lo_accelsearch_time = 0.0 self.hi_accelsearch_time = 0.0 self.singlepulse_time = 0.0 self.sifting_time = 0.0 self.folding_time = 0.0 self.total_time = 0.0 # Inialize some candidate counters self.num_sifted_cands = 0 self.num_folded_cands = 0 self.num_single_cands = 0 # Set dedispersion plan self.set_DDplan()
def upload_results(dir): try: db = database.Database('default', autocommit=False) if not os.path.exists(dir) or not os.listdir(dir): errormsg = 'ERROR: Results directory, %s, does not exist or is empty' % dir raise upload.UploadNonFatalError(errormsg) fitsfiles = glob.glob(os.path.join(dir, "*.fits")) data = datafile.autogen_dataobj(fitsfiles) version_number = JobUploader.get_version_number(dir) hdr = header.get_header(fitsfiles) print "\tHeader parsed." cands, tempdir = candidates.get_candidates(version_number, dir) print "\tPeriodicity candidates parsed." sp_cands = sp_candidates.get_spcandidates(version_number, dir) print "\tSingle pulse candidates parsed." for c in (cands + sp_cands): hdr.add_dependent(c) diags = diagnostics.get_diagnostics(data.obs_name, data.beam_id, \ data.obstype, \ version_number, \ dir) print "\tDiagnostics parsed." header_id = hdr.upload(db) for d in diags: d.upload(db) print "\tEverything uploaded and checked successfully. header_id=%d" % \ header_id except (upload.UploadNonFatalError): exceptionmsgs = traceback.format_exception(*sys.exc_info()) errormsg = "Error while checking results!\n" errormsg += "\tResults Dir: %s\n\n" % dir errormsg += "".join(exceptionmsgs) sys.stderr.write("Error while checking results!\n") sys.stderr.write("Database transaction will not be committed.\n") sys.stderr.write("\t%s" % exceptionmsgs[-1]) # Rolling back changes. db.rollback() except (database.DatabaseConnectionError, CornellFTP.CornellFTPTimeout,\ upload.UploadDeadlockError, database.DatabaseDeadlockError), e: # Connection error while uploading. We will try again later. sys.stderr.write(str(e)) sys.stderr.write("\tRolling back DB transaction and will re-try later.\n") # Rolling back changes. db.rollback()
def __init__(self, datafns, *args, **kwargs): if isinstance(datafns, datafile.Data): self.data = datafns else: self.data = datafile.autogen_dataobj(datafns, *args, **kwargs) # List of dependents (ie other uploadables that require # the header_id from this header) self.dependents = []
def get_output_dir(fns): """Given a list of data files, 'fns', generate path to output results. path is: {base_results_directory}/{mjd}/{obs_name}/{beam_num}/{proc_date}/ Note: 'base_results_directory' is defined in the processing config file. 'mjd', 'obs_name', and 'beam_num' are from parsing the job's datafiles. 'proc_date' is the current date in yymmddThhmmss format. """ # Get info from datafile headers data = datafile.autogen_dataobj([fns[0]]) if not isinstance(data, datafile.PsrfitsData): errormsg = "Data must be of PSRFITS format.\n" errormsg += "\tData type: %s\n" % type(data) raise pipeline_utils.PipelineError(errormsg) # Generate output directory mjd = int(data.timestamp_mjd) beam_num = data.beam_id obs_name = data.obs_name proc_date = datetime.datetime.now().strftime('%y%m%dT%H%M%S') baseoutdir = os.path.join(config.processing.base_results_directory, \ str(mjd), str(obs_name), \ str(beam_num), proc_date) outdir = baseoutdir # Make sure our output directory doesn't already exist counter = 0 while os.path.exists(outdir): counter += 1 outdir = "%s_%d" % (baseoutdir, counter) # Make the directory immediately so the pipeline knows it's taken try: os.makedirs(outdir) os.chmod(outdir, 0777) created = True return outdir, created except: current_usr = getpass.getuser() errormsg = "Permission denied to %s when attempting to create output directory %s" % ( current_usr, outdir) f = open( '/scratch/eparent/eparent/PALFA4/results/missing_permission.out', 'a') f.write(errormsg + '\n') f.close() print errormsg created = False return outdir, created
def get_output_dir(fns): """Given a list of data files, 'fns', generate path to output results. path is: {base_results_directory}/{mjd}/{obs_name}/{beam_num}/{proc_date}/ Note: 'base_results_directory' is defined in the processing config file. 'mjd', 'obs_name', and 'beam_num' are from parsing the job's datafiles. 'proc_date' is the current date in yymmddThhmmss format. """ # Get info from datafile headers data = datafile.autogen_dataobj([fns[0]]) if not isinstance(data, datafile.PsrfitsData): errormsg = "Data must be of PSRFITS format.\n" errormsg += "\tData type: %s\n" % type(data) raise pipeline_utils.PipelineError(errormsg) # Generate output directory mjd = int(data.timestamp_mjd) beam_num = data.beam_id obs_name = data.obs_name proc_date = datetime.datetime.now().strftime('%y%m%dT%H%M%S') baseoutdir = os.path.join(config.processing.base_results_directory, \ str(mjd), str(obs_name), \ str(beam_num), proc_date) outdir = baseoutdir # Make sure our output directory doesn't already exist counter = 0 while os.path.exists(outdir): counter += 1 outdir = "%s_%d" % (baseoutdir, counter) # Make the directory immediately so the pipeline knows it's taken os.makedirs(outdir) # Send an email if our first choice for outdir wasn't available if counter: errormsg = "The first-choice output directory '%s' " \ "already existed. Had to settle for '%s' " \ "after %d tries. \n\n " \ "Data files:\n " \ "\t%s" % (baseoutdir, outdir, counter, "\n\t".join(fns)) notification = mailer.ErrorMailer(errormsg, \ subject="Job outdir existance warning") notification.send() return outdir
def copy_zaplist(fns, workdir): # Copy zaplist to working directory data = datafile.autogen_dataobj(fns) # First, try to find custom zaplist for this MJD customzapfn = os.path.join(config.processing.zaplistdir, \ "autozap_mjd%d.zaplist" % int(data.timestamp_mjd)) if os.path.exists(customzapfn): # Copy custom zaplist to workdir and rename to the expected zaplist fn shutil.copy(customzapfn, workdir) print "Copied custom zaplist: %s" % customzapfn else: # Copy default zaplist shutil.copy(config.processing.default_zaplist, workdir) print "No custom zaplist found. Copied default zaplist: %s" % \ config.processing.default_zaplist
def presubmission_check(fns): """Check to see if datafiles meet the critera for submission. """ # Check that files exist missingfiles = [fn for fn in fns if not os.path.exists(fn)] if missingfiles: errormsg = "The following files cannot be found:\n" for missing in missingfiles: errormsg += "\t%s\n" % missing raise pipeline_utils.PipelineError(errormsg) # if files missing want to crash try: #for WAPP, check if coords are in table data = datafile.autogen_dataobj([fns[0]]) if not isinstance(data, datafile.PsrfitsData): errormsg = "Data must be of PSRFITS format.\n" errormsg += "\tData type: %s\n" % type(data) raise FailedPreCheckError(errormsg) except datafile.DataFileError, e: raise FailedPreCheckError(e)
def presubmission_check(fns): """Check to see if datafiles meet the critera for submission. """ # Check that files exist missingfiles = [fn for fn in fns if not os.path.exists(fn)] if missingfiles: errormsg = "The following files cannot be found:\n" for missing in missingfiles: errormsg += "\t%s\n" % missing raise pipeline_utils.PipelineError( errormsg) # if files missing want to crash try: #for WAPP, check if coords are in table data = datafile.autogen_dataobj([fns[0]]) if not isinstance(data, datafile.PsrfitsData): errormsg = "Data must be of PSRFITS format.\n" errormsg += "\tData type: %s\n" % type(data) raise FailedPreCheckError(errormsg) except datafile.DataFileError, e: raise FailedPreCheckError(e)
def presubmission_check(fns): """Check to see if datafiles meet the critera for submission. """ # Check that files exist missingfiles = [fn for fn in fns if not os.path.exists(fn)] if missingfiles: errormsg = "The following files cannot be found:\n" with open('~/missing_files.txt', 'w') as f: for missing in missingfiles: f.write(missing + '\n') errormsg += "\t%s\n" % missing raise MissingFilesError(errormsg) # if files missing want to crash try: #for WAPP, check if coords are in table data = datafile.autogen_dataobj([fns[0]]) print isinstance(data, datafile.PsrfitsData) if not isinstance(data, datafile.PsrfitsData): errormsg = "Data must be of PSRFITS format.\n" errormsg += "\tData type: %s\n" % type(data) raise FailedPreCheckError(errormsg) except (datafile.DataFileError, ValueError), e: raise FailedPreCheckError(e)
def upload_results(job_submit): """ Uploads Results for a given submit. Input: job_submit: A row from the job_submits table. Results from this job submission will be uploaded. Output: None """ print "Attempting to upload results" print "\tJob ID: %d, Job submission ID: %d" % \ (job_submit['job_id'], job_submit['id']) try: db = database.Database('common-copy', autocommit=False) # Prepare for upload dir = job_submit['output_dir'] fitsfiles = get_fitsfiles(job_submit) # Upload results header_id = header.upload_header(fitsfiles, dbname=db) if not header.upload_header(fitsfiles, dbname=db): raise AssertionError("Header values in common DB " \ "do not match values uploaded.") else: print "\tHeader uploaded and checked. Header ID: %d" % header_id version_number = get_version_number(dir) candidate_uploader.upload_candidates(header_id, \ version_number, \ dir, dbname=db) if not candidate_uploader.check_candidates(header_id, \ version_number, \ dir, dbname=db): raise AssertionError("Candidate values in common DB " \ "do not match values uploaded.") else: print "\tCandidates uploaded and checked." data = datafile.autogen_dataobj(fitsfiles) diagnostic_uploader.upload_diagnostics(data.obs_name, data.beam_id, \ data.obstype, \ version_number, \ dir, dbname=db) if not diagnostic_uploader.check_diagnostics(data.obs_name, data.beam_id, \ data.obstype, \ version_number, \ dir, dbname=db): raise AssertionError("Diagnostic values in common DB " \ "do not match values uploaded.") else: print "\tDiagnostics uploaded and checked." except (header.HeaderError, \ candidate_uploader.PeriodicityCandidateError, \ diagnostic_uploader.DiagnosticError): # Parsing error caught. Job attempt has failed! exceptionmsgs = traceback.format_exception(*sys.exc_info()) errormsg = "Error while checking results!\n" errormsg += "\tJob ID: %d, Job submit ID: %d\n\n" % \ (job_submit['job_id'], job_submit['id']) errormsg += "".join(exceptionmsgs) sys.stderr.write("Error while checking results!\n") sys.stderr.write("Database transaction will not be committed.\n") sys.stderr.write("\t%s" % exceptionmsgs[-1]) queries = [] arglists = [] queries.append("UPDATE job_submits " \ "SET status='upload_failed', " \ "details=?, " \ "updated_at=? " \ "WHERE id=?") arglists.append((errormsg, jobtracker.nowstr(), job_submit['id'])) queries.append("UPDATE jobs " \ "SET status='failed', " \ "details='Error while uploading results', " \ "updated_at=? " \ "WHERE id=?") arglists.append((jobtracker.nowstr(), job_submit['job_id'])) jobtracker.execute(queries, arglists) # Rolling back changes. db.rollback() except database.DatabaseConnectionError, e: # Connection error while uploading. We will try again later. sys.stderr.write(str(e)) sys.stderr.write( "\tRolling back DB transaction and will re-try later.\n") # Rolling back changes. db.rollback()
def upload_results(job_submit): """ Uploads Results for a given submit. Input: job_submit: A row from the job_submits table. Results from this job submission will be uploaded. Output: None """ print "Attempting to upload results" print "\tJob ID: %d, Job submission ID: %d" % \ (job_submit['job_id'], job_submit['id']) if debug.UPLOAD: upload.upload_timing_summary = {} starttime = time.time() try: # Connect to the DB db = database.Database('SPAN512', autocommit=False) # Prepare for upload dir = job_submit['output_dir'] if not os.path.exists(dir) or not os.listdir(dir): errormsg = 'ERROR: Results directory, %s, does not exist or is empty for job_id=%d' %\ (dir, job_submit['job_id']) raise upload.UploadNonFatalError(errormsg) fitsfiles = get_fitsfiles(job_submit) data = datafile.autogen_dataobj(fitsfiles) version_number = get_version_number(dir) if debug.UPLOAD: parsetime = time.time() # Upload results hdr = header.get_header(fitsfiles) print "\tHeader parsed." rat_inst_id_cache = ratings2.utils.RatingInstanceIDCache(dbname='nancay') cands, tempdir = candidates.get_candidates(version_number, dir, \ timestamp_mjd=data.timestamp_mjd, \ inst_cache=rat_inst_id_cache) print "\tPeriodicity candidates parsed." sp_cands = sp_candidates.get_spcandidates(version_number, dir, \ timestamp_mjd=data.timestamp_mjd) print "\tSingle pulse candidates parsed." for c in (cands + sp_cands): hdr.add_dependent(c) diags = diagnostics.get_diagnostics(data.obs_name, data.beam_id, \ data.obstype, \ version_number, \ dir) print "\tDiagnostics parsed." if debug.UPLOAD: upload.upload_timing_summary['Parsing'] = \ upload.upload_timing_summary.setdefault('Parsing', 0) + \ (time.time()-parsetime) # Perform the upload header_id = hdr.upload(db) for d in diags: d.upload(db) print "\tDB upload completed and checked successfully. header_id=%d" % \ header_id except (upload.UploadNonFatalError): # Parsing error caught. Job attempt has failed! exceptionmsgs = traceback.format_exception(*sys.exc_info()) errormsg = "Error while checking results!\n" errormsg += "\tJob ID: %d, Job submit ID: %d\n\n" % \ (job_submit['job_id'], job_submit['id']) errormsg += "".join(exceptionmsgs) sys.stderr.write("Error while checking results!\n") sys.stderr.write("Database transaction will not be committed.\n") sys.stderr.write("\t%s" % exceptionmsgs[-1]) queries = [] arglists = [] queries.append("UPDATE job_submits " \ "SET status='upload_failed', " \ "details=\"%s\", " \ "updated_at='%s' " \ "WHERE id=%d"%(errormsg.replace("\"","\'"), jobtracker.nowstr(), job_submit['id'])) queries.append("UPDATE jobs " \ "SET status='failed', " \ "details='Error while uploading results', " \ "updated_at='%s' " \ "WHERE id=%d"%(jobtracker.nowstr(), job_submit['job_id'])) jobtracker.query(queries) # Rolling back changes. db.rollback() except (database.DatabaseConnectionError, \ upload.UploadDeadlockError, database.DatabaseDeadlockError), e: # Connection error while uploading. We will try again later. sys.stderr.write(str(e)) sys.stderr.write("\tRolling back DB transaction and will re-try later.\n") # Rolling back changes. db.rollback()
def upload_results(job_submit): """ Uploads Results for a given submit. Input: job_submit: A row from the job_submits table. Results from this job submission will be uploaded. Output: None """ print "Attempting to upload results" print "\tJob ID: %d, Job submission ID: %d" % \ (job_submit['job_id'], job_submit['id']) if debug.UPLOAD: upload.upload_timing_summary = {} starttime = time.time() try: # Connect to the DB db = database.Database('default', autocommit=False) # Prepare for upload dir = job_submit['output_dir'] if not os.path.exists(dir) or not os.listdir(dir): errormsg = 'ERROR: Results directory, %s, does not exist or is empty for job_id=%d' %\ (dir, job_submit['job_id']) raise upload.UploadNonFatalError(errormsg) fitsfiles = get_fitsfiles(job_submit) data = datafile.autogen_dataobj(fitsfiles) version_number = get_version_number(dir) if debug.UPLOAD: parsetime = time.time() # Upload results hdr = header.get_header(fitsfiles) print "\tHeader parsed." cands = candidates.get_candidates(version_number, dir) print "\tPeriodicity candidates parsed." sp_cands = sp_candidates.get_spcandidates(version_number, dir) print "\tSingle pulse candidates parsed." for c in (cands + sp_cands): hdr.add_dependent(c) diags = diagnostics.get_diagnostics(data.obs_name, data.beam_id, \ data.obstype, \ version_number, \ dir) print "\tDiagnostics parsed." if debug.UPLOAD: upload.upload_timing_summary['Parsing'] = \ upload.upload_timing_summary.setdefault('Parsing', 0) + \ (time.time()-parsetime) # Perform the upload header_id = hdr.upload(db) for d in diags: d.upload(db) print "\tEverything uploaded and checked successfully. header_id=%d" % \ header_id except (upload.UploadNonFatalError): # Parsing error caught. Job attempt has failed! exceptionmsgs = traceback.format_exception(*sys.exc_info()) errormsg = "Error while checking results!\n" errormsg += "\tJob ID: %d, Job submit ID: %d\n\n" % \ (job_submit['job_id'], job_submit['id']) errormsg += "".join(exceptionmsgs) sys.stderr.write("Error while checking results!\n") sys.stderr.write("Database transaction will not be committed.\n") sys.stderr.write("\t%s" % exceptionmsgs[-1]) queries = [] arglists = [] queries.append("UPDATE job_submits " \ "SET status='upload_failed', " \ "details=?, " \ "updated_at=? " \ "WHERE id=?") arglists.append((errormsg, jobtracker.nowstr(), job_submit['id'])) queries.append("UPDATE jobs " \ "SET status='failed', " \ "details='Error while uploading results', " \ "updated_at=? " \ "WHERE id=?") arglists.append((jobtracker.nowstr(), job_submit['job_id'])) jobtracker.execute(queries, arglists) # Rolling back changes. db.rollback() except (database.DatabaseConnectionError, CornellFTP.CornellFTPTimeout,\ upload.UploadDeadlockError, database.DatabaseDeadlockError), e: # Connection error while uploading. We will try again later. sys.stderr.write(str(e)) sys.stderr.write( "\tRolling back DB transaction and will re-try later.\n") # Rolling back changes. db.rollback()
def upload_results(job_submit): """ Uploads Results for a given submit. Input: job_submit: A row from the job_submits table. Results from this job submission will be uploaded. Output: None """ print "Attempting to upload results" print "\tJob ID: %d, Job submission ID: %d" % \ (job_submit['job_id'], job_submit['id']) try: db = database.Database('common-copy', autocommit=False) # Prepare for upload dir = job_submit['output_dir'] fitsfiles = get_fitsfiles(job_submit) # Upload results header_id = header.upload_header(fitsfiles, dbname=db) if not header.upload_header(fitsfiles, dbname=db): raise AssertionError("Header values in common DB " \ "do not match values uploaded.") else: print "\tHeader uploaded and checked. Header ID: %d" % header_id version_number = get_version_number(dir) candidate_uploader.upload_candidates(header_id, \ version_number, \ dir, dbname=db) if not candidate_uploader.check_candidates(header_id, \ version_number, \ dir, dbname=db): raise AssertionError("Candidate values in common DB " \ "do not match values uploaded.") else: print "\tCandidates uploaded and checked." data = datafile.autogen_dataobj(fitsfiles) diagnostic_uploader.upload_diagnostics(data.obs_name, data.beam_id, \ data.obstype, \ version_number, \ dir, dbname=db) if not diagnostic_uploader.check_diagnostics(data.obs_name, data.beam_id, \ data.obstype, \ version_number, \ dir, dbname=db): raise AssertionError("Diagnostic values in common DB " \ "do not match values uploaded.") else: print "\tDiagnostics uploaded and checked." except (header.HeaderError, \ candidate_uploader.PeriodicityCandidateError, \ diagnostic_uploader.DiagnosticError): # Parsing error caught. Job attempt has failed! exceptionmsgs = traceback.format_exception(*sys.exc_info()) errormsg = "Error while checking results!\n" errormsg += "\tJob ID: %d, Job submit ID: %d\n\n" % \ (job_submit['job_id'], job_submit['id']) errormsg += "".join(exceptionmsgs) sys.stderr.write("Error while checking results!\n") sys.stderr.write("Database transaction will not be committed.\n") sys.stderr.write("\t%s" % exceptionmsgs[-1]) queries = [] arglists = [] queries.append("UPDATE job_submits " \ "SET status='upload_failed', " \ "details=?, " \ "updated_at=? " \ "WHERE id=?") arglists.append((errormsg, jobtracker.nowstr(), job_submit['id'])) queries.append("UPDATE jobs " \ "SET status='failed', " \ "details='Error while uploading results', " \ "updated_at=? " \ "WHERE id=?") arglists.append((jobtracker.nowstr(), job_submit['job_id'])) jobtracker.execute(queries, arglists) # Rolling back changes. db.rollback() except database.DatabaseConnectionError, e: # Connection error while uploading. We will try again later. sys.stderr.write(str(e)) sys.stderr.write("\tRolling back DB transaction and will re-try later.\n") # Rolling back changes. db.rollback()
def upload_results(dir): try: db = database.Database('default', autocommit=False) if not os.path.exists(dir) or not os.listdir(dir): errormsg = 'ERROR: Results directory, %s, does not exist or is empty' % dir raise upload.UploadNonFatalError(errormsg) pdm_dir = os.path.join( dir, "zerodm") if config.upload.upload_zerodm_periodicity else dir sp_dir = os.path.join( dir, "zerodm") if config.upload.upload_zerodm_singlepulse else dir fitsfiles = glob.glob(os.path.join(dir, "*.fits")) data = datafile.autogen_dataobj(fitsfiles) version_number = JobUploader.get_version_number(dir) hdr = header.get_header(fitsfiles) print "\tHeader parsed." rat_inst_id_cache = ratings2.utils.RatingInstanceIDCache( dbname='common3') #!!!! #rat_inst_id_cache = ratings2.utils.RatingInstanceIDCache(dbname='MichellePalfaCands') #cands, tempdir = candidates.get_candidates(version_number, dir) cands, tempdir = candidates.get_candidates(version_number, pdm_dir, \ timestamp_mjd=data.timestamp_mjd, \ inst_cache=rat_inst_id_cache) print "\tPeriodicity candidates parsed." #sp_cands = sp_candidates.get_spcandidates(version_number, dir) sp_cands, tempdir_sp = sp_candidates.get_spcandidates(version_number, sp_dir, \ timestamp_mjd=data.timestamp_mjd, \ inst_cache=rat_inst_id_cache) print "\tSingle pulse candidates parsed." for c in (cands + sp_cands): hdr.add_dependent(c) diags = diagnostics.get_diagnostics(data.obs_name, data.beam_id, \ data.obstype, \ version_number, \ pdm_dir, sp_dir) print "\tDiagnostics parsed." header_id = hdr.upload(db) for d in diags: d.upload(db) print "\tEverything uploaded and checked successfully. header_id=%d" % \ header_id except (upload.UploadNonFatalError): exceptionmsgs = traceback.format_exception(*sys.exc_info()) errormsg = "Error while checking results!\n" errormsg += "\tResults Dir: %s\n\n" % dir errormsg += "".join(exceptionmsgs) sys.stderr.write("Error while checking results!\n") sys.stderr.write("Database transaction will not be committed.\n") sys.stderr.write("\t%s" % exceptionmsgs[-1]) # Rolling back changes. db.rollback() except (database.DatabaseConnectionError, CornellFTP.CornellFTPTimeout,\ upload.UploadDeadlockError, database.DatabaseDeadlockError), e: # Connection error while uploading. We will try again later. sys.stderr.write(str(e)) sys.stderr.write( "\tRolling back DB transaction and will re-try later.\n") # Rolling back changes. db.rollback()
def upload_results(job_submit): """ Uploads Results for a given submit. Input: job_submit: A row from the job_submits table. Results from this job submission will be uploaded. Output: None """ print "Attempting to upload results" print "\tJob ID: %d, Job submission ID: %d\n\tOutput Dir: %s" % \ (job_submit['job_id'], job_submit['id'], job_submit['output_dir']) if debug.UPLOAD: upload.upload_timing_summary = {} starttime = time.time() try: # Connect to the DB db = database.Database('default', autocommit=False) # Prepare for upload dir = job_submit['output_dir'] # NEW Beluga - Untar the tarball import tarfile to_keep = os.listdir(job_submit['output_dir']) tarball = glob.glob(job_submit['output_dir'] + '/*00.tgz')[0] tar = tarfile.open(tarball, 'r:gz') tar.extractall(path=job_submit['output_dir']) tar.close() all_files = os.listdir(job_submit['output_dir']) to_del = set(all_files) - set(to_keep) if config.upload.upload_zerodm_periodicity or config.upload.upload_zerodm_singlepulse: to_keep_zerodm = os.listdir(job_submit['output_dir'] + '/zerodm') tarball = glob.glob(job_submit['output_dir'] + '/zerodm/*zerodm.tgz')[0] tar = tarfile.open(tarball, 'r:gz') tar.extractall(path=job_submit['output_dir'] + '/zerodm') tar.close() all_files_zerodm = os.listdir(job_submit['output_dir'] + '/zerodm') to_del_zerodm = set(all_files_zerodm) - set(to_keep_zerodm) pdm_dir = os.path.join( dir, "zerodm") if config.upload.upload_zerodm_periodicity else dir sp_dir = os.path.join( dir, "zerodm") if config.upload.upload_zerodm_singlepulse else dir if not os.path.exists(dir) or not os.listdir(dir): errormsg = 'ERROR: Results directory, %s, does not exist or is empty for job_id=%d' %\ (dir, job_submit['job_id']) raise upload.UploadNonFatalError(errormsg) elif len(os.listdir(dir)) == 1 and os.listdir(dir)[0] == 'zerodm' \ and not os.listdir(os.path.join(dir,os.listdir(dir)[0])): errormsg = 'ERROR: Results directory, %s, does not exist or is empty for job_id=%d' %\ (dir, job_submit['job_id']) raise upload.UploadNonFatalError(errormsg) fitsfiles = get_fitsfiles(job_submit) try: data = datafile.autogen_dataobj(fitsfiles) except ValueError: raise upload.UploadNonFatalError version_number = get_version_number(dir) if debug.UPLOAD: parsetime = time.time() # Upload results hdr = header.get_header(fitsfiles) print "\tHeader parsed." rat_inst_id_cache = ratings2.utils.RatingInstanceIDCache( dbname='common3') cands, tempdir = candidates.get_candidates(version_number, pdm_dir, \ timestamp_mjd=data.timestamp_mjd, \ inst_cache=rat_inst_id_cache) print "\tPeriodicity candidates parsed. (%d cands)" % len(cands) sp_cands, tempdir_sp = sp_candidates.get_spcandidates(version_number, sp_dir, \ timestamp_mjd=data.timestamp_mjd, \ inst_cache=rat_inst_id_cache) print "\tSingle pulse candidates parsed. (%d cands)" % len(sp_cands) diags = diagnostics.get_diagnostics(data.obs_name, data.beam_id, \ data.obstype, \ version_number, \ pdm_dir, sp_dir) print "\tDiagnostics parsed." for c in (cands + sp_cands): hdr.add_dependent(c) if debug.UPLOAD: upload.upload_timing_summary['Parsing'] = \ upload.upload_timing_summary.setdefault('Parsing', 0) + \ (time.time()-parsetime) # Perform the upload header_id = hdr.upload(db) print "Header ID: ", header_id for d in diags: d.upload(db) print "\tDB upload completed and checked successfully. header_id=%d" % \ header_id except (upload.UploadNonFatalError): # Parsing error caught. Job attempt has failed! exceptionmsgs = traceback.format_exception(*sys.exc_info()) errormsg = "Error while checking results!\n" errormsg += "\tJob ID: %d, Job submit ID: %d\n\n" % \ (job_submit['job_id'], job_submit['id']) errormsg += "".join(exceptionmsgs) sys.stderr.write("Error while checking results!\n") sys.stderr.write("Database transaction will not be committed.\n") sys.stderr.write("\t%s" % exceptionmsgs[-1]) queries = [] arglists = [] queries.append("UPDATE job_submits " \ "SET status='upload_failed', " \ "details=?, " \ "updated_at=? " \ "WHERE id=?") arglists.append((errormsg, jobtracker.nowstr(), job_submit['id'])) queries.append("UPDATE jobs " \ "SET status='failed', " \ "details='Error while uploading results', " \ "updated_at=? " \ "WHERE id=?") arglists.append((jobtracker.nowstr(), job_submit['job_id'])) jobtracker.execute(queries, arglists) # Rolling back changes. db.rollback() except (database.DatabaseConnectionError, ratings2.database.DatabaseConnectionError,\ CornellFTP.CornellFTPTimeout, upload.UploadDeadlockError,\ database.DatabaseDeadlockError), e: # Connection error while uploading. We will try again later. sys.stderr.write(str(e)) sys.stderr.write( "\tRolling back DB transaction and will re-try later.\n") # Rolling back changes. db.rollback()
def __init__(self, datafns, *args, **kwargs): if isinstance(datafns, datafile.Data): self.data = datafns else: self.data = datafile.autogen_dataobj(datafns, *args, **kwargs)
def __init__(self, filenms, resultsdir, zerodm): # whether or not to zerodm timeseries self.zerodm = zerodm # which searches to perform self.search_pdm = True self.search_sp = True self.filenms = filenms self.filenmstr = ' '.join(self.filenms) self.basefilenm = os.path.split(filenms[0])[1].rstrip(".fits") # Where to dump all the results. # Put zerodm results in a separate folder so they don't overwrite # the non-zerodm results if self.zerodm: self.outputdir = os.path.join(resultsdir, 'zerodm') self.basefilenm = self.basefilenm + '_zerodm' else: self.outputdir = resultsdir # Read info from PSRFITS file data = datafile.autogen_dataobj(self.filenms) # Correct positions in data file headers spec_info = data.specinfo self.backend = spec_info.backend self.MJD = spec_info.start_MJD[0] self.ra_string = spec_info.ra_str self.dec_string = spec_info.dec_str self.orig_N = spec_info.N self.dt = spec_info.dt # in sec self.BW = spec_info.BW self.orig_T = spec_info.T # Downsampling is catered to the number of samples per row. # self.N = psr_utils.choose_N(self.orig_N) self.N = self.orig_N self.T = self.N * self.dt self.nchan = spec_info.num_channels self.samp_per_row = spec_info.spectra_per_subint self.fctr = spec_info.fctr self.numrows = np.sum(spec_info.num_subint) print "JGM: RA:" print self.ra_string print "DEC: " print self.dec_string print "MJD:" print self.MJD print "T: " print self.T # Determine the average barycentric velocity of the observation self.baryv = get_baryv(self.ra_string, self.dec_string, self.MJD, self.T, obs="NC") # Figure out which host we are processing on self.hostname = socket.gethostname() # The fraction of the data recommended to be masked by rfifind self.masked_fraction = 0.0 # The number of candidates folded self.num_cands_folded = 0 # Initialize our timers self.rfifind_time = 0.0 self.downsample_time = 0.0 self.subbanding_time = 0.0 self.dedispersing_time = 0.0 self.FFT_time = 0.0 self.lo_accelsearch_time = 0.0 self.hi_accelsearch_time = 0.0 self.singlepulse_time = 0.0 self.sp_grouping_time = 0.0 self.sifting_time = 0.0 self.folding_time = 0.0 self.zerodm_time = 0.0 self.total_time = 0.0 # Inialize some candidate counters self.num_sifted_cands = 0 self.num_folded_cands = 0 self.num_single_cands = 0 # Set dedispersion plan self.set_DDplan()