def sifting_job(job): # Make the single-pulse plots basedmb = job.basefilenm+"_DM" basedmb_zerodm = job.basefilenm+"_zerodm_DM" basedme = ".singlepulse " # The following will make plots for DM ranges: # 0-110, 100-310, 300-1000+ dmglobs = [basedmb+"[0-9].[0-9][0-9]"+basedme + basedmb+"[0-9][0-9].[0-9][0-9]"+basedme + basedmb+"10[0-9].[0-9][0-9]"+basedme, basedmb+"[12][0-9][0-9].[0-9][0-9]"+basedme + basedmb+"30[0-9].[0-9][0-9]"+basedme, basedmb+"[3-9][0-9][0-9].[0-9][0-9]"+basedme + basedmb+"1[0-9][0-9][0-9].[0-9][0-9]"+basedme] dmrangestrs = ["0-110", "100-310", "300-1000+"] psname = job.basefilenm+"_singlepulse.ps" psname_zerodm = job.basefilenm+"_zerodm_singlepulse.ps" if config.searching.use_zerodm_sp: dmglobs.extend([basedmb_zerodm+"[0-9].[0-9][0-9]"+basedme + basedmb_zerodm+"[0-9][0-9].[0-9][0-9]"+basedme + basedmb_zerodm+"10[0-9].[0-9][0-9]"+basedme, basedmb_zerodm+"[12][0-9][0-9].[0-9][0-9]"+basedme + basedmb_zerodm+"30[0-9].[0-9][0-9]"+basedme, basedmb_zerodm+"[3-9][0-9][0-9].[0-9][0-9]"+basedme + basedmb_zerodm+"1[0-9][0-9][0-9].[0-9][0-9]"+basedme]) dmrangestrs.extend(["0-110_zerodm", "100-310_zerodm", "300-1000+_zerodm"]) for dmglob, dmrangestr in zip(dmglobs, dmrangestrs): dmfiles = [] for dmg in dmglob.split(): dmfiles += glob.glob(dmg.strip()) # Check that there are matching files and they are not all empty if dmfiles and sum([os.path.getsize(f) for f in dmfiles]): cmd = 'single_pulse_search.py -t %f -g "%s"' % \ (config.searching.singlepulse_plot_SNR, dmglob) job.singlepulse_time += timed_execute(cmd) if dmrangestr.endswith("zerodm"): os.rename(psname_zerodm, job.basefilenm+"_DMs%s_singlepulse.ps" % dmrangestr) else: os.rename(psname, job.basefilenm+"_DMs%s_singlepulse.ps" % dmrangestr) # Compute a binary summary of all SP candidates cmd = "singlepulse2bin -q -b %s"%job.basefilenm job.singlepulse_time += timed_execute(cmd) # Recompute the DM list dmstrs = [] for ddplan in job.ddplans: for passnum in range(ddplan.numpasses): for dmstr in ddplan.dmlist[passnum]: dmstrs.append(dmstr) # Sift through the candidates to choose the best to fold job.sifting_time = time.time() # TODO for ipart in range(config.searching.split): tmp_accel_cands = sifting.read_candidates(glob.glob("*part%dx*ACCEL_%d" % (ipart, config.searching.hi_accel_zmax))) if len(tmp_accel_cands): tmp_accel_cands = sifting.remove_duplicate_candidates(tmp_accel_cands) if len(tmp_accel_cands): tmp_accel_cands = sifting.remove_DM_problems(tmp_accel_cands, config.searching.numhits_to_fold, dmstrs, config.searching.low_DM_cutoff) if ipart: hi_accel_cands += tmp_accel_cands else: hi_accel_cands = tmp_accel_cands lo_accel_cands = sifting.read_candidates(glob.glob("*ACCEL_%d" % config.searching.lo_accel_zmax)) if len(lo_accel_cands): lo_accel_cands = sifting.remove_duplicate_candidates(lo_accel_cands) if len(lo_accel_cands): lo_accel_cands = sifting.remove_DM_problems(lo_accel_cands, config.searching.numhits_to_fold, dmstrs, config.searching.low_DM_cutoff) job.all_accel_cands = lo_accel_cands + hi_accel_cands if len(job.all_accel_cands): job.all_accel_cands = sifting.remove_duplicate_candidates(job.all_accel_cands) job.all_accel_cands = sifting.remove_harmonics(job.all_accel_cands) # Note: the candidates will be sorted in _sigma_ order, not _SNR_! job.all_accel_cands.sort(sifting.cmp_sigma) print "Sending candlist to stdout before writing to file" sifting.write_candlist(job.all_accel_cands) sys.stdout.flush() sifting.write_candlist(job.all_accel_cands, job.basefilenm+".accelcands") # Make sifting summary plots job.all_accel_cands.plot_goodbad() plt.title("%s Rejected Cands" % job.basefilenm) plt.savefig(job.basefilenm+".accelcands.rejects.png") job.all_accel_cands.plot_summary() plt.title("%s Periodicity Summary" % job.basefilenm) plt.savefig(job.basefilenm+".accelcands.summary.png") # Write out sifting candidate summary job.all_accel_cands.print_cand_summary(job.basefilenm+".accelcands.summary") # Write out sifting comprehensive report of bad candidates job.all_accel_cands.write_cand_report(job.basefilenm+".accelcands.report") timed_execute("gzip -f --best %s" % job.basefilenm+".accelcands.report") # Moving of results to resultsdir now happens in clean_up(...) # shutil.copy(job.basefilenm+".accelcands", job.outputdir) job.sifting_time = time.time() - job.sifting_time
def search_job(job): """Search the observation defined in the obs_info instance 'job'. """ # Use whatever .zaplist is found in the current directory zaplist = glob.glob("*.zaplist")[0] print "Using %s as zaplist" % zaplist if config.searching.use_subbands and config.searching.fold_rawdata: # make a directory to keep subbands so they can be used to fold later try: os.makedirs(os.path.join(job.workdir, 'subbands')) except: pass # rfifind the data file cmd = "rfifind %s -time %.17g -o %s %s" % \ (config.searching.datatype_flag, config.searching.rfifind_chunk_time, job.basefilenm, job.filenmstr) job.rfifind_time += timed_execute(cmd, stdout="%s_rfifind.out" % job.basefilenm) maskfilenm = job.basefilenm + "_rfifind.mask" # Find the fraction that was suggested to be masked # Note: Should we stop processing if the fraction is # above some large value? Maybe 30%? job.masked_fraction = find_masked_fraction(job) # Iterate over the stages of the overall de-dispersion plan dmstrs = [] for ddplan in job.ddplans: # Iterate over the individual passes through the data file for passnum in range(ddplan.numpasses): subbasenm = "%s_DM%s"%(job.basefilenm, ddplan.subdmlist[passnum]) if config.searching.use_subbands: try: os.makedirs(os.path.join(job.tempdir, 'subbands')) except: pass # Create a set of subbands cmd = "prepsubband %s -sub -subdm %s -downsamp %d -nsub %d -mask %s " \ "-o %s/subbands/%s %s" % \ (config.searching.datatype_flag, ddplan.subdmlist[passnum], ddplan.sub_downsamp, ddplan.numsub, maskfilenm, job.tempdir, job.basefilenm, job.filenmstr) job.subbanding_time += timed_execute(cmd, stdout="%s.subout" % subbasenm) # Now de-disperse using the subbands cmd = "prepsubband -lodm %.2f -dmstep %.2f -numdms %d -downsamp %d " \ "-nsub %d -numout %d -o %s/%s %s/subbands/%s.sub[0-9]*" % \ (ddplan.lodm+passnum*ddplan.sub_dmstep, ddplan.dmstep, ddplan.dmsperpass, ddplan.dd_downsamp, ddplan.numsub, psr_utils.choose_N(job.orig_N/ddplan.downsamp), job.tempdir, job.basefilenm, job.tempdir, subbasenm) job.dedispersing_time += timed_execute(cmd, stdout="%s.prepout" % subbasenm) if config.searching.use_zerodm_sp or config.searching.use_zerodm_accel: cmd = "prepsubband -lodm %.2f -dmstep %.2f -numdms %d -downsamp %d " \ "-nsub %d -numout %d -zerodm -o %s/%s_zerodm %s/subbands/%s.sub[0-9]*" % \ (ddplan.lodm+passnum*ddplan.sub_dmstep, ddplan.dmstep, ddplan.dmsperpass, ddplan.dd_downsamp, ddplan.numsub, psr_utils.choose_N(job.orig_N/ddplan.downsamp), job.tempdir, job.basefilenm, job.tempdir, subbasenm) job.dedispersing_time += timed_execute(cmd, stdout="%s.prepout" % subbasenm) else: # Not using subbands cmd = "prepsubband -mask %s -lodm %.2f -dmstep %.2f -numdms %d -downsamp %d " \ "-numout %d -nsub %d -o %s/%s %s"%\ (maskfilenm, ddplan.lodm+passnum*ddplan.sub_dmstep, ddplan.dmstep, ddplan.dmsperpass, ddplan.dd_downsamp*ddplan.sub_downsamp, psr_utils.choose_N(job.orig_N/ddplan.downsamp), ddplan.numsub, job.tempdir, job.basefilenm, job.filenmstr) job.dedispersing_time += timed_execute(cmd) # Iterate over all the new DMs for dmstr in ddplan.dmlist[passnum]: dmstrs.append(dmstr) basenm = os.path.join(job.tempdir, job.basefilenm+"_DM"+dmstr) basenm_zerodm = os.path.join(job.tempdir, job.basefilenm+"_zerodm_DM"+dmstr) datnm = basenm+".dat" datnm_zerodm = basenm_zerodm+".dat" fftnm = basenm+".fft" infnm = basenm+".inf" # Do the single-pulse search cmd = "single_pulse_search.py -p -m %f -t %f %s"%\ (config.searching.singlepulse_maxwidth, \ config.searching.singlepulse_threshold, datnm) job.singlepulse_time += timed_execute(cmd) try: shutil.move(basenm+".singlepulse", job.workdir) except: pass if config.searching.use_zerodm_sp: cmd = "single_pulse_search.py -p -m %f -t %f %s"%\ (config.searching.singlepulse_maxwidth, \ config.searching.singlepulse_threshold, datnm_zerodm) job.singlepulse_time += timed_execute(cmd) try: shutil.move(basenm_zerodm+".singlepulse", job.workdir) except: pass # FFT, zap, and de-redden cmd = "realfft %s"%datnm job.FFT_time += timed_execute(cmd) cmd = "zapbirds -zap -zapfile %s -baryv %.6g %s"%\ (zaplist, job.baryv, fftnm) job.FFT_time += timed_execute(cmd) cmd = "rednoise %s"%fftnm job.FFT_time += timed_execute(cmd) try: os.rename(basenm+"_red.fft", fftnm) except: pass # Do the low-acceleration search cmd = "accelsearch -harmpolish -numharm %d -sigma %f " \ "-zmax %d -flo %f %s"%\ (config.searching.lo_accel_numharm, \ config.searching.lo_accel_sigma, \ config.searching.lo_accel_zmax, \ config.searching.lo_accel_flo, fftnm) job.lo_accelsearch_time += timed_execute(cmd) try: os.remove(basenm+"_ACCEL_%d.txtcand" % config.searching.lo_accel_zmax) except: pass try: # This prevents errors if there are no cand files to copy shutil.move(basenm+"_ACCEL_%d.cand" % config.searching.lo_accel_zmax, \ job.workdir) shutil.move(basenm+"_ACCEL_%d" % config.searching.lo_accel_zmax, \ job.workdir) except: pass # Do the high-acceleration search cmd = "accelsearch -harmpolish -numharm %d -sigma %f " \ "-zmax %d -flo %f %s"%\ (config.searching.hi_accel_numharm, \ config.searching.hi_accel_sigma, \ config.searching.hi_accel_zmax, \ config.searching.hi_accel_flo, fftnm) job.hi_accelsearch_time += timed_execute(cmd) try: os.remove(basenm+"_ACCEL_%d.txtcand" % config.searching.hi_accel_zmax) except: pass try: # This prevents errors if there are no cand files to copy shutil.move(basenm+"_ACCEL_%d.cand" % config.searching.hi_accel_zmax, \ job.workdir) shutil.move(basenm+"_ACCEL_%d" % config.searching.hi_accel_zmax, \ job.workdir) except: pass # Move the .inf files try: shutil.move(infnm, job.workdir) except: pass # Remove the .dat and .fft files try: os.remove(datnm) except: pass try: os.remove(fftnm) except: pass if config.searching.use_subbands: if config.searching.fold_rawdata: # Subband files are no longer needed shutil.rmtree(os.path.join(job.tempdir, 'subbands')) else: # Move subbands to workdir for sub in glob.glob(os.path.join(job.tempdir, 'subbands', "*")): shutil.move(sub, os.path.join(job.workdir, 'subbands')) # Make the single-pulse plots basedmb = job.basefilenm+"_DM" basedmb_zerodm = job.basefilenm+"_zerodm_DM" basedme = ".singlepulse " # The following will make plots for DM ranges: # 0-110, 100-310, 300-1000+ dmglobs = [basedmb+"[0-9].[0-9][0-9]"+basedme + basedmb+"[0-9][0-9].[0-9][0-9]"+basedme + basedmb+"10[0-9].[0-9][0-9]"+basedme, basedmb+"[12][0-9][0-9].[0-9][0-9]"+basedme + basedmb+"30[0-9].[0-9][0-9]"+basedme, basedmb+"[3-9][0-9][0-9].[0-9][0-9]"+basedme + basedmb+"1[0-9][0-9][0-9].[0-9][0-9]"+basedme] dmrangestrs = ["0-110", "100-310", "300-1000+"] psname = job.basefilenm+"_singlepulse.ps" psname_zerodm = job.basefilenm+"_zerodm_singlepulse.ps" if config.searching.use_zerodm_sp: dmglobs.extend([basedmb_zerodm+"[0-9].[0-9][0-9]"+basedme + basedmb_zerodm+"[0-9][0-9].[0-9][0-9]"+basedme + basedmb_zerodm+"10[0-9].[0-9][0-9]"+basedme, basedmb_zerodm+"[12][0-9][0-9].[0-9][0-9]"+basedme + basedmb_zerodm+"30[0-9].[0-9][0-9]"+basedme, basedmb_zerodm+"[3-9][0-9][0-9].[0-9][0-9]"+basedme + basedmb_zerodm+"1[0-9][0-9][0-9].[0-9][0-9]"+basedme]) dmrangestrs.extend(["0-110_zerodm", "100-310_zerodm", "300-1000+_zerodm"]) for dmglob, dmrangestr in zip(dmglobs, dmrangestrs): dmfiles = [] for dmg in dmglob.split(): dmfiles += glob.glob(dmg.strip()) # Check that there are matching files and they are not all empty if dmfiles and sum([os.path.getsize(f) for f in dmfiles]): cmd = 'single_pulse_search.py -t %f -g "%s"' % \ (config.searching.singlepulse_plot_SNR, dmglob) job.singlepulse_time += timed_execute(cmd) if dmrangestr.endswith("zerodm"): os.rename(psname_zerodm, job.basefilenm+"_DMs%s_singlepulse.ps" % dmrangestr) else: os.rename(psname, job.basefilenm+"_DMs%s_singlepulse.ps" % dmrangestr) # Sift through the candidates to choose the best to fold job.sifting_time = time.time() lo_accel_cands = sifting.read_candidates(glob.glob("*ACCEL_%d" % config.searching.lo_accel_zmax)) if len(lo_accel_cands): lo_accel_cands = sifting.remove_duplicate_candidates(lo_accel_cands) if len(lo_accel_cands): lo_accel_cands = sifting.remove_DM_problems(lo_accel_cands, config.searching.numhits_to_fold, dmstrs, config.searching.low_DM_cutoff) hi_accel_cands = sifting.read_candidates(glob.glob("*ACCEL_%d" % config.searching.hi_accel_zmax)) if len(hi_accel_cands): hi_accel_cands = sifting.remove_duplicate_candidates(hi_accel_cands) if len(hi_accel_cands): hi_accel_cands = sifting.remove_DM_problems(hi_accel_cands, config.searching.numhits_to_fold, dmstrs, config.searching.low_DM_cutoff) all_accel_cands = lo_accel_cands + hi_accel_cands if len(all_accel_cands): all_accel_cands = sifting.remove_harmonics(all_accel_cands) # Note: the candidates will be sorted in _sigma_ order, not _SNR_! all_accel_cands.sort(sifting.cmp_sigma) print "Sending candlist to stdout before writing to file" sifting.write_candlist(all_accel_cands) sys.stdout.flush() sifting.write_candlist(all_accel_cands, job.basefilenm+".accelcands") # Make sifting summary plots all_accel_cands.plot_goodbad() plt.title("%s Rejected Cands" % job.basefilenm) plt.savefig(job.basefilenm+".accelcands.rejects.png") all_accel_cands.plot_summary() plt.title("%s Periodicity Summary" % job.basefilenm) plt.savefig(job.basefilenm+".accelcands.summary.png") # Write out sifting candidate summary all_accel_cands.print_cand_summary(job.basefilenm+".accelcands.summary") # Write out sifting comprehensive report of bad candidates all_accel_cands.write_cand_report(job.basefilenm+".accelcands.report") timed_execute("gzip --best %s" % job.basefilenm+".accelcands.report") # Moving of results to resultsdir now happens in clean_up(...) # shutil.copy(job.basefilenm+".accelcands", job.outputdir) job.sifting_time = time.time() - job.sifting_time ##### # Print some info useful for debugging print "Contents of workdir (%s) before folding: " % job.workdir for fn in os.listdir(job.workdir): print " %s" % fn print "Contents of resultsdir (%s) before folding: " % job.outputdir for fn in os.listdir(job.outputdir): print " %s" % fn print "Contents of job.tempdir (%s) before folding: " % job.tempdir for fn in os.listdir(job.tempdir): print " %s" % fn sys.stdout.flush() ##### # Fold the best candidates cands_folded = 0 for cand in all_accel_cands: print "At cand %s" % str(cand) if cands_folded == config.searching.max_cands_to_fold: break if cand.sigma >= config.searching.to_prepfold_sigma: print "...folding" job.folding_time += timed_execute(get_folding_command(cand, job)) cands_folded += 1 job.num_cands_folded = cands_folded # Rate candidates timed_execute("rate_pfds.py --redirect-warnings --include-all -x pulse_width *.pfd") sys.stdout.flush() # Calculate some candidate attributes from pfds attrib_file = open('candidate_attributes.txt','w') for pfdfn in glob.glob("*.pfd"): attribs = {} pfd = prepfold.pfd(pfdfn) red_chi2 = pfd.bestprof.chi_sqr dof = pfd.proflen - 1 attribs['prepfold_sigma'] = \ -scipy.stats.norm.ppf(scipy.stats.chi2.sf(red_chi2*dof, dof)) off_red_chi2 = pfd.estimate_offsignal_redchi2() new_red_chi2 = red_chi2 / off_red_chi2 # prepfold sigma rescaled to deal with chi-squared suppression # a problem when strong rfi is present attribs['rescaled_prepfold_sigma'] = \ -scipy.stats.norm.ppf(scipy.stats.chi2.sf(new_red_chi2*dof, dof)) for key in attribs: attrib_file.write("%s\t%s\t%.3f\n" % (pfdfn, key, attribs[key])) attrib_file.close() # Print some info useful for debugging print "Contents of workdir (%s) after folding: " % job.workdir for fn in os.listdir(job.workdir): print " %s" % fn print "Contents of resultsdir (%s) after folding: " % job.outputdir for fn in os.listdir(job.outputdir): print " %s" % fn print "Contents of job.tempdir (%s) after folding: " % job.tempdir for fn in os.listdir(job.tempdir): print " %s" % fn sys.stdout.flush() ##### # Now step through the .ps files and convert them to .png and gzip them psfiles = glob.glob("*.ps") for psfile in psfiles: # The '[0]' appeneded to the end of psfile is to convert only the 1st page timed_execute("convert -quality 90 %s -background white -flatten -rotate 90 +matte %s" % \ (psfile+"[0]", psfile[:-3]+".png")) timed_execute("gzip "+psfile) # Print some info useful for debugging print "Contents of workdir (%s) after conversion: " % job.workdir for fn in os.listdir(job.workdir): print " %s" % fn print "Contents of resultsdir (%s) after conversion: " % job.outputdir for fn in os.listdir(job.outputdir): print " %s" % fn print "Contents of job.tempdir (%s) after conversion: " % job.tempdir for fn in os.listdir(job.tempdir): print " %s" % fn sys.stdout.flush()