def remove_bad_subints(infn, badsubints=None, badsubint_intervals=None): """Zero-weights bad subints. The file is modified in-place. However, zero-weighting is used for trimming, so the process is reversible. Note: Subints are indexed starting at 0. Inputs: infn: name of time to remove subints from. badchans: A list of subints to remove badchan_intervals: A list of subint intervals (inclusive) to remove Outputs: None """ if badsubints is None: badsubints = config.cfg.badsubints if badsubint_intervals is None: badsubint_intervals = config.cfg.badsubint_intervals zaplets = [] if badsubints: zaplets.append("-w '%s'" % " ".join(['%d' % ww for ww in badsubints])) if badsubint_intervals: zaplets.extend(["-W '%d %d'" % lohi for lohi in badsubint_intervals]) if zaplets: utils.print_info("Removing bad subints.", 2) utils.execute("paz -m %s %s" % (" ".join(zaplets), infn.fn))
def get_obslog_entry(arf, tolerant=False): """Given an archive file, find the entry in the observing log. Inputs: arf: ArchiveFile object. tolerant: Be tolerant with name matching. This is important for flux-cal observations. (Default: False) Output: obsinfo: A dictionary of observing information. """ obsdt_utc, names = __prep_obslog_search(arf, tolerant) logentries = __obslog_db_match(obsdt_utc, names) if not logentries: utils.print_info( 'No matches found in obslog DB. Searching text files.', 1) logentries = __obslog_file_match(obsdt_utc, names) if len(logentries) != 1: msg = "Bad number (%d) of entries " \ "in obslog with correct source name (%s) " \ "close to observation (%s) start time (UTC: %s)" % \ (len(logentries), arf['name'], arf.fn, obsdt_utc.strftime('%c')) if len(logentries) > 1: msg += ":\n%s" % \ "\n".join([pprint.pformat(entry) for entry in logentries]) raise errors.HeaderCorrectionError(msg) return logentries[0]
def main(): if args.file_ids: rows = get_files_by_id(args.file_ids) else: rows = get_files(args.psrnames, args.type) info = {} utils.sort_by_keys(rows, args.sortkeys) for row in rows: if args.fmt is not None: print args.fmt.decode('string-escape') % row else: print os.path.join(row['filepath'], row['filename']) utils.print_info( " File ID: %(file_id)d; " "Obs ID: %(obs_id)d; " "Status: %(status)s; " "Stage: %(stage)s; " "QC passed: %(qcpassed)s" % row, 2) info['Total'] = info.get('Total', 0) + 1 info['QC Passed'] = info.get('QC Passed', 0) + int( bool(row['qcpassed'])) info['Status %s' % row['status']] = \ info.get('Status %s' % row['status'], 0)+1 info['Stage %s' % row['stage']] = \ info.get('Stage %s' % row['stage'], 0)+1 utils.print_info( "Summary:\n %s" % "\n ".join(["%s: %d" % xx for xx in info.iteritems()]), 1)
def prepare_subints(subdirs, subints, baseoutdir, trimpcnt=6.25, effix=False, backend=None): """Prepare subints by - Copying them to the temporary working directory - De-weighting a percentage from each sub-band edge - Converting archive format to PSRFITS Inputs: subdirs: List of sub-band directories containing sub-ints to combine subints: List of subint files to be combined. (NOTE: These are the file name only (i.e. no path) Each file listed should appear in each of the subdirs.) baseoutdir: Directory containing the sub-directories of preprared files. trimpcnt: Percentage (ie between 0-100) of subband to trim from _each_ edge of the band. (Default: 6.25%) effix: Change observation site to eff_psrix to correct for asterix clock offsets. (Default: False) backend: Name of the backend. (Default: leave as is) Outputs: prepsubdirs: The sub-directories containing prepared files. """ devnull = open(os.devnull) tmpsubdirs = [] for subdir in utils.show_progress(subdirs, width=50): freqdir = os.path.split(os.path.abspath(subdir))[-1] freqdir = os.path.join(baseoutdir, freqdir) try: os.makedirs(freqdir) except OSError: # Directory already exists pass fns = [os.path.join(subdir, fn) for fn in subints] preproc = 'convert psrfits' if effix: preproc += ',edit site=eff_psrix' if backend: if ("," in backend) or ("=" in backend) or (' ' in backend): raise errors.UnrecognizedValueError("Backend value (%s) is " "invalid. It cannot " "contain ',' or '=' or " "' '" % backend) preproc += ',edit be:name=%s' % backend utils.execute( ['paz', '-j', preproc, '-E', '%f' % trimpcnt, '-O', freqdir] + fns, stderr=devnull) tmpsubdirs.append(freqdir) utils.print_info( "Prepared %d subint fragments in %d freq sub-dirs" % (len(subints), len(subdirs)), 3) return tmpsubdirs
def __prune_band_edges(self, ar): """Prune the edges of the band. This is useful for removing channels where there is no response. The file is modified in-place. However, zero-weighting is used for pruning, so the process is reversible. Inputs: ar: The psrchive archive object to clean. Outputs: None """ if self.configs.response is None: utils.print_info('No freq range specified for band pruning. Skipping...', 2) else: lofreq, hifreq = self.configs.response # Use absolute value in case band is flipped (BW<0) # bw = ar.get_bandwidth() # assigned but never used nchan = ar.get_nchan() # chanbw = bw/nchan # assigned but never used utils.print_info('Pruning frequency band to (%g-%g MHz)' % (lofreq, hifreq), 2) # Loop over channels for ichan in range(nchan): # Get profile for subint=0, pol=0 prof = ar.get_Profile(0, 0, ichan) freq = prof.get_centre_frequency() if (freq < lofreq) or (freq > hifreq): clean_utils.zero_weight_chan(ar, ichan)
def combine_all(infns, outfn, expected_nsubbands=None): """Given a list of ArchiveFile objects group them into sub-bands then remove the edges of each sub-band to remove the artifacts caused by aliasing. Finally, combine the sub-bands into a single output file. The combined sub-band files are not saved. Inputs: infns: A list of input ArchiveFile objects. outfn: The output file's name. expected_nsubbands: The expected number of subbands for each subintegration. Outputs: combinedfns: A list of output (combined) files. """ if expected_nsubbands is None: expected_nsubbands = config.cfg.expected_nsubbands infns = check_files(infns, expected_nsubbands=expected_nsubbands) groups = group_files(infns) combinedfiles = [] # Combine files from the same sub-band in the time direction for group in groups: subbands = [] for ctr_freq, to_combine in utils.group_by_ctr_freq(group).iteritems(): utils.print_info("Combining %d subints at ctr freq %d MHz" % \ (len(to_combine), ctr_freq), 3) # Combine sub-integrations for this sub-band subfn = utils.get_outfn(outfn + ".%(freq)dMHz", to_combine[0]) if subfn in [f.fn for f in subbands]: warnings.warn("'combined_all(...)' is overwritting files it " \ "previously created!") subband = combine_subints(to_combine, subfn) clean.trim_edge_channels(subband) subbands.append(subband) combinedfn = utils.get_outfn(outfn, subbands[0]) utils.print_info("Combining %d subbands into %s" % \ (len(subbands), combinedfn), 3) if combinedfn in [f.fn for f in combinedfiles]: warnings.warn("'combined_all(...)' is overwritting files it " \ "previously created!") combinedfile = combine_subbands(subbands, combinedfn) combinedfiles.append(combinedfile) if not config.debug.INTERMEDIATE: # Remove the temporary combined files for sub in subbands: os.remove(sub.fn) return combinedfiles
def remove_bad_channels(infn, badchans=None, badchan_intervals=None, badfreqs=None, badfreq_intervals=None): """Zero-weight bad channels and channels containing bad frequencies. The file is modified in-place. However, zero-weighting is used for trimming, so the process is reversible. Note: Channels are indexed starting at 0. Inputs: infn: name of time to remove channels from. badchans: A list of channels to remove badchan_intervals: A list of channel intervals (inclusive) to remove badfreqs: A list of frequencies. The channels containing these frequencies will be removed. badfreq_intervals: A list of frequency ranges to remove. The channels containing these frequencies will be removed. Outputs: None """ if badchans is None: badchans = config.cfg.badchans if badchan_intervals is None: badchan_intervals = config.cfg.badchan_intervals if badfreqs is None: badfreqs = config.cfg.badfreqs if badfreq_intervals is None: badfreq_intervals = config.cfg.badfreq_intervals zaplets = [] if badchans: zaplets.append("-z '%s'" % " ".join(['%d' % zz for zz in badchans])) if badchan_intervals: zaplets.extend(["-Z '%d %d'" % lohi for lohi in badchan_intervals]) if badfreqs: zaplets.append("-f '%s'" % " ".join(['%f' % ff for ff in badfreqs])) if badfreq_intervals: zaplets.extend(["-F '%f %f'" % lohi for lohi in badfreq_intervals]) if zaplets: utils.print_info("Removing bad channels.", 2) utils.execute("paz -m %s %s" % (" ".join(zaplets), infn.fn))
def prune_band(infn, response=None): """Prune the edges of the band. This is useful for removing channels where there is no response. The file is modified in-place. However, zero-weighting is used for pruning, so the process is reversible. Inputs: infn: name of file to trim. response: A tuple specifying the range of frequencies outside of which should be de-weighted. Outputs: None """ if response is None: response = config.cfg.rcvr_response_lims if response is None: utils.print_info( "No freq range specified for band pruning. Skipping...", 2) else: # Use absolute value in case band is flipped (BW<0) lofreq = infn['freq'] - np.abs(0.5 * infn['bw']) hifreq = infn['freq'] + np.abs(0.5 * infn['bw']) utils.print_info("Pruning frequency band to (%g-%g MHz)" % response, 2) utils.print_debug("Archive's freq band (%g-%g MHz)" % \ (lofreq, hifreq), 'clean') pazcmd = 'paz -m %s ' % infn.fn runpaz = False # Only run paz if either of the following clauses are True if response[0] > lofreq: # Part of archive's low freqs are outside rcvr's response pazcmd += '-F "%f %f" ' % (lofreq, response[0]) runpaz = True if response[1] < hifreq: # Part of archive's high freqs are outside rcvr's response pazcmd += '-F "%f %f" ' % (response[1], hifreq) runpaz = True if runpaz: utils.execute(pazcmd) else: warnings.warn("Not pruning band edges! All data are " \ "within the receiver's response.", \ errors.CoastGuardWarning)
def get_files_to_combine(rows, max_span=1, min_snr=0): """Given a list of result sets from the database return a list of filenames to combine to make a template. Inputs: rows: A list of database result sets as returned by 'get_files()'. max_span: The maximum allowable span, in days, from the first data file to the last data file to combine. (Default: 1 day) min_snr: Ignore data files with SNR lower than this value. (Default: 0) Output: files: A list of file names to combine. """ utils.sort_by_keys(rows, ['start_mjd']) info = [] for ii, row in enumerate(rows): jj = ii tot = 0 for jj in range(ii, len(rows)): if (rows[jj]['start_mjd'] - row['start_mjd']) > max_span: break snr = (rows[jj]['snr'] or 0) # This will replace None values with 0 if snr >= min_snr: tot += snr jj += 1 info.append((ii, tot, jj - ii)) if not info: return [] ind, snr, nn = max(info, key=lambda aa: aa[1]) utils.print_info( "Highest total SNR is %g for %d files starting " "at index %d." % (snr, nn, ind), 2) touse = rows[ind:ind + nn] utils.sort_by_keys(touse, ['snr_r']) return [ os.path.join(rr['filepath'], rr['filename']) for rr in touse if (rr['snr'] or 0) >= min_snr ]
def clean_archive(inarf, outfn, clean_re=None, *args, **kwargs): import psrchive # Temporarily, because python bindings # are not available on all computers if clean_re is None: clean_re = config.cfg.clean_strategy try: outfn = utils.get_outfn(outfn, inarf) shutil.copy(inarf.fn, outfn) outarf = utils.ArchiveFile(outfn) trim_edge_channels(outarf) prune_band(outarf) remove_bad_channels(outarf) remove_bad_subints(outarf) matching_cleaners = [ clnr for clnr in cleaners if clean_re and re.search(clean_re, clnr) ] if len(matching_cleaners) == 1: ar = psrchive.Archive_load(outarf.fn) cleaner = eval(matching_cleaners[0]) utils.print_info( "Cleaning using '%s(...)'." % matching_cleaners[0], 2) cleaner(ar, *args, **kwargs) ar.unload(outfn) elif len(matching_cleaners) == 0: utils.print_info("No cleaning strategy selected. Skipping...", 2) else: raise errors.CleanError("Bad cleaner selection. " \ "'%s' has %d matches." % \ (clean_re, len(matching_cleaners))) except: # An error prevented cleaning from being successful # Remove the output file because it may confuse the user if os.path.exists(outfn): os.remove(outfn) raise return outarf
def main(): rows = get_files(args.psrnames, retry=args.retry) info = {} psrnameset = set([row['sourcename'] for row in rows]) utils.sort_by_keys(rows, args.sortkeys) db = database.Database() with db.transaction() as conn: for row in rows: if row['obstype'] == 'pulsar': calscans = reduce_data.get_potential_polcal_scans( db, row['obs_id']) cancal = bool(calscans) sys.stdout.write(args.fmt.decode('string-escape') % row) if row['obstype'] == 'pulsar': sys.stdout.write("\t%s\n" % cancal) utils.print_info( "Number of potential calibrator scans: %d" % len(calscans), 1) msg = " %s" % "\n ".join([ "Obs ID: %d; File ID: %d; %s" % (calrow['obs_id'], calrow['file_id'], calrow['filename']) for calrow in calscans if type(calrow) is not str ]) utils.print_info(msg, 2) else: sys.stdout.write("\n") if args.retry: for desc in reduce_data.get_all_descendents( row['file_id'], db): if (desc['status'] == 'failed') and (desc['stage'] == 'calibrated'): # File has been calibrated, but it failed. Do not retry. cancal = False utils.print_info( "Calibration of file %d has previously failed. Will _not_ retry." % row['file_id'], 1) if (cancal and (row['status'] != 'failed')) or ( not cancal and (row['status'] == 'calfail')): retry(db, row['file_id']) utils.print_info( "Will retry calibration of file %d" % row['file_id'], 1) if args.retry: for name in psrnameset: try: reduce_data.reattempt_calibration(db, name) calibrate.update_caldb(db, name, force=True) except: pass
def __trim_edge_channels(self, ar): """Trim the edge channels of an input file to remove band-pass roll-off and the effect of aliasing. The file is modified in-place. However, zero-weighting is used for trimming, so the process is reversible. Inputs: ar: The psrchive archive object to clean. Outputs: None """ nchan = ar.get_nchan() bw = float(ar.get_bandwidth()) num_to_trim = max(self.configs.trimnum, int(self.configs.trimfrac * nchan + 0.5), int(self.configs.trimbw / bw * nchan + 0.5)) if num_to_trim > 0: utils.print_info('Trimming %d channels from each band-edge.' % num_to_trim, 2) for ichan in range(num_to_trim): clean_utils.zero_weight_chan(ar, ichan) # trim at beginning clean_utils.zero_weight_chan(ar, nchan - ichan - 1) # trim at end
def group_subband_dirs(subdirs, maxspan=None, maxgap=None, \ tossfrac=None, filetype='subint'): """Based on file names group sub-ints from different sub-bands. Each subband is assumed to be in a separate directory. Inputs: subdirs: List of sub-band directories maxspan: Maximum span, in seconds, between first and last sub-int in a combined file. maxgap: Maximum gap, in seconds, permitted before starting a new output file. tossfrac: Fraction of sub-ints required for a sub-band to be combined. If a sub-band has fewer than tossfrac*N_subint sub-ints it will be excluded. filetype: Type of files being grouped. Can be 'subint', or 'single'. (Default: 'subint') Outputs: usedirs: List of directories to use when combining. (NOTE: This may be different than the input 'subdirs' because some directories may have too few subints to be worth combining. This depends on the input value of 'tossfrac'.) groups: List of groups of files to be combined. (NOTE: These are the file name only (i.e. no path) Each file listed appears in each of 'usedirs'.) """ if maxspan is None: maxspan = config.cfg.combine_maxspan if maxgap is None: maxgap = config.cfg.combine_maxgap if tossfrac is None: tossfrac = 1 - config.cfg.missing_subint_tolerance if filetype not in FILETYPE_SPECIFICS: raise errors.InputError("File type (%s) is not recognized. " \ "Possible values are: '%s'" % \ (filetype, "', '".join(FILETYPE_SPECIFICS.keys()))) else: globpat, get_start = FILETYPE_SPECIFICS[filetype] # Ensure paths are absolute subdirs = [os.path.abspath(path) for path in subdirs] utils.print_debug("Grouping subints from %d sub-band directories" % \ len(subdirs), 'combine') nindirs = len(subdirs) nsubbands = len(subdirs) nperdir = collections.Counter() noccurs = collections.Counter() nintotal = 0 for subdir in subdirs: fns = glob.glob(os.path.join(subdir, globpat)) nn = len(fns) utils.print_debug("Found %d sub-int files in %s" % \ (nn, subdir), 'combine') nintotal += nn nperdir[subdir] = nn noccurs.update([os.path.basename(fn) for fn in fns]) nsubints = len(noccurs) # Remove sub-bands that have too few subints thresh = tossfrac * nsubints for ii in xrange(len(subdirs) - 1, -1, -1): subdir = subdirs[ii] if nperdir[subdir] < thresh: utils.print_info("Ignoring sub-ints from %s. " \ "It has too few sub-ints (%d < %d; tossfrac: %f)" % \ (subdir, nperdir[subdir], thresh, tossfrac), 2) subdirs.pop(ii) del nperdir[subdir] fns = glob.glob(os.path.join(subdir, globpat)) noccurs.subtract([os.path.basename(fn) for fn in fns]) nsubbands -= 1 # Remove subints that are no longer included in any subbands to_del = [] for fn in noccurs: if not noccurs[fn]: to_del.append(fn) for fn in to_del: del noccurs[fn] # Now combine subints lastsubint = datetime.datetime.min filestart = datetime.datetime.min groups = [] if nsubbands: for subint in sorted(noccurs): if noccurs[subint] < nsubbands: utils.print_info("Ignoring sub-int (%s). It doesn't apear in all " \ "subbands (only %d of %d)" % \ (subint, noccurs[subint], nsubbands), 2) continue start = get_start(os.path.join(subdirs[0], subint)) if ((start - filestart).total_seconds() > maxspan) or \ ((start - lastsubint).total_seconds() > maxgap): filestart = start utils.print_debug("Starting a new file at %s" % \ filestart, 'combine') # Start a new file groups.append([]) groups[-1].append(subint) lastsubint = start nused = sum([len(grp) for grp in groups]) utils.print_info("Grouped %d files from %d directories into %d groups.\n" \ "(Threw out %d directories and %d files)" % \ (nintotal, nindirs, len(groups), nindirs-len(subdirs), \ nintotal-nused), 2) return subdirs, groups
def run(self, ar): utils.print_info( "Cleaning '%s' with %s" % (ar.get_filename(), self.name), 1) utils.print_debug("Cleaning parameters: %s" % self.get_config_string(), 'clean') self._clean(ar)
def main(): psrname = utils.get_prefname(args.psrname) if args.nchan == 1: ext = '.FTp' scrunchargs = ['-F'] elif args.nchan > 1: ext = '.Tp.F%d' % args.nchan scrunchargs = ['--setnchn', '%d' % args.nchan] else: raise ValueError("Cannot scrunch using negative number of " "channels (nchan=%d)" % args.nchan) #psrdirs = dict([(utils.get_prefname(os.path.basename(dd)), # os.path.basename(dd)) # for dd in glob.glob(os.path.join(PARFILE_DIR, '*')) # if os.path.isdir(dd)]) #if psrname in psrdirs: # legacydir = os.path.join('/homes/plazarus/research/epta-legacy/', # psrdirs[psrname]) #else: # legacydir = None # Copy EPTA legacy TOAs #if legacydir and not os.path.exists("epta-legacy"): # os.mkdir("epta-legacy") # shutil.copytree(os.path.join(legacydir, "tims"), "epta-legacy/tims") # shutil.copy(os.path.join(legacydir, # "%s_t2noise.model" % psrdirs[psrname]), # "epta-legacy") # Find parfile if args.parfile is not None: if not os.path.exists(args.parfile): raise errors.InputError("Parfile specified (%s) doesn't exist!" % args.parfile) inparfn = args.parfile else: # Create parfile #inparfn = os.path.join('/homes/plazarus/research/epta-legacy/', # psrdirs[psrname], "%s.par" % psrdirs[psrname]) inparfn = reduce_data.PARFILES[psrname] #intimfn = os.path.join('/homes/plazarus/research/epta-legacy/', # psrdirs[psrname], "%s_all.tim" % psrdirs[psrname]) outparfn = "%s.T2.par" % psrname with open(inparfn, 'r') as inff, open(outparfn, 'w') as outff: for line in inff: # Don't copy over JUMPs or EFACs to 'outff' if not line.startswith("JUMP") and \ not 'EFAC' in line: outff.write(line) outff.write("\n".join(EXTRA_PARFILE_LINES)) template_dir = os.path.join(BASE_TEMPLATE_DIR, psrname) for stage in STAGES: if stage == "current": continue for rcvr in RCVRS: template_name = "%s_%s_%s.std" % (psrname, rcvr, stage) # First, check if templates exists if not os.path.isfile(os.path.join(template_dir, template_name)): # Make template utils.print_info("No template (%s) found!" % template_name, 1) try: os.makedirs(template_dir) except: pass try: print psrname, stage, rcvr stdfn = make_template.make_template( template_dir, psrname, stage, rcvr) utils.print_info("Made template: %s" % stdfn, 1) except errors.TemplateGenerationError: pass timfns = [] for stage in STAGES: # List files to reduce rows = list_files.get_files([psrname], stage) print len(rows) fns = {} # Initialize list of file names for each receiver for rcvr in RCVRS: fns[rcvr] = [] for row in rows: if row['stage'] not in ('cleaned', 'calibrated'): continue fn = os.path.join(row['filepath'], row['filename']) fns[row['rcvr']].append(fn) stagetimfn = "%s_%s.tim" % (psrname, stage) print "Opening %s" % stagetimfn stagetimff = open(stagetimfn, 'w') # Create file listings and generate TOAs for rcvr in RCVRS: print rcvr, len(fns[rcvr]) if not fns[rcvr]: # No files continue # Check for existing scrunched files toscrunch = [] scrunchedfns = [] scrunchdir = os.path.join("scrunched", rcvr) for fn in fns[rcvr]: scrunchfn = os.path.join(scrunchdir, os.path.basename(fn) + ext) scrunchedfns.append(scrunchfn) if not os.path.exists(scrunchfn): toscrunch.append(fn) # Scrunch files try: os.makedirs(scrunchdir) except: pass print "Working on %s %s" % (rcvr, stage) for fn in utils.show_progress(toscrunch, width=50): # Create a copy of the file with the 'eff_psrix' site cmd = ['psredit', '-c', 'site=eff_psrix', '-O', scrunchdir, fn] cmd.extend(['-e', fn.split('.')[-1] + ext]) utils.execute(cmd) arfn = os.path.join(scrunchdir, os.path.basename(fn + ext)) parfn = utils.get_norm_parfile(arfn) # Re-install ephemeris cmd = ['pam', '-Tp', '-E', parfn, '-m', arfn] + scrunchargs utils.execute(cmd) toas = [] mjds = [] for row in rows: if row['rcvr'] != rcvr: continue if row['stage'] not in ('cleaned', 'calibrated'): continue template_name = "%s_%s_%s.std" % (psrname, rcvr, row['stage']) template = os.path.join(template_dir, template_name) # Generate TOAs fn = os.path.join(scrunchdir, row['filename']) + ext print fn stdout, stderr = utils.execute([ "pat", "-T", "-A", "FDM", "-f", "tempo2", "-C", "rcvr chan", "-d", "-s", template, fn ]) # Parse TOAs toalines = stdout.split('\n') for line in toalines: toainfo = readers.tempo2_reader(line) if toainfo is not None: # Formatter expects 'file' field to be called 'rawfile' toainfo['rawfile'] = toainfo['file'] toainfo['telescope_code'] = toainfo['telescope'] toainfo['type'] = stage toainfo['rcvr'] = rcvr toainfo['file_id'] = row['file_id'] toainfo['obs_id'] = row['obs_id'] toainfo['shortstage'] = row['stage'][:2].upper() if row['stage'] == 'cleaned': toainfo['grp'] = "%s_clean" % rcvr else: toainfo['grp'] = "%s_cal" % rcvr toainfo['chan'] = toainfo['extras']['chan'] toas.append(toainfo) mjds.append(toainfo['imjd']) # Sort TOAs utils.sort_by_keys(toas, ['fmjd', 'imjd']) # Format timfile sysflag = 'EFF.AS.%(rcvr)s.%(shortstage)s' timlines = formatters.tempo2_formatter(toas, flags=[ ('rcvr', '%(rcvr)s'), ('type', '%(type)s'), ('grp', '%(grp)s'), ('sys', sysflag), ('obsid', '%(obs_id)d'), ('fileid', '%(file_id)d'), ('chan', '%(chan)s') ]) mjds.sort() #offsetmjds = sorted(TIME_OFFSETS.keys()) #inds = np.searchsorted(mjds, offsetmjds)+1 # Insert extra lines from back of list #for ind, key in reversed(zip(inds, offsetmjds)): # timlines[ind:ind] = ["\n"+TIME_OFFSETS[key]+"\n"] # Write out timfile timfn = "%s_%s_%s.tim" % (psrname, rcvr, stage) with open(timfn, 'w') as ff: for line in timlines: ff.write(line + "\n") utils.print_info("Wrote out timfile: %s" % timfn) timfns.append(timfn) stagetimff.write("INCLUDE %s\n" % timfn) stagetimff.close() #outtimfn = os.path.join("epta-legacy", os.path.basename(intimfn)) #with open(intimfn, 'r') as inff, open(outtimfn, 'w') as outff: # for line in inff: # outff.write(line) # for rcvr in RCVRS: # timfn = "%s_%s_cleaned.tim" % (psrname, rcvr) # if os.path.exists(timfn): # outff.write("INCLUDE ../%s\n" % timfn) # Count TOAs #toas = load_toa.parse_timfile(outtimfn, determine_obssystem=False) systems = {} #for toa in toas: # if toa['is_bad']: # continue # if not 'sys' in toa['extras']: # print toa # else: # nn = systems.get(toa['extras']['sys'], 0) # systems[toa['extras']['sys']] = nn+1 outparfn = "%s.T2.par" % psrname #outparfn2 = os.path.join("epta-legacy", os.path.basename(inparfn)) with open(inparfn, 'r') as inff, open(outparfn, 'w') as outff: #, \ #open(outparfn2, 'w') as outff2: for line in inff: # Don't copy over JUMPs or EFACs to 'outff' # Copy JUMPs and EFACs to 'outff2' and fit #if line.startswith("JUMP"): # if "-sys" in line: # obssys = line.split()[2] # if systems.get(obssys, 0): # # Observing system has TOAs # # Replace all system jumps by 0 and set the fit flag # outff2.write(" ".join(line.split()[:3])+" 0 1\n") # else: # outff2.write(line) #elif line.startswith("T2EFAC"): # outff2.write(line) #elif line.startswith("NITS"): # pass #else: outff.write(line) # Remove fit-flags for 'outff2' #outff2.write(" ".join(line.split()[:2])+'\n') outff.write("\n".join(EXTRA_PARFILE_LINES)) #outff2.write("\n".join(["JUMP -sys EFF.AS.%s.CL 0 1" % rcvr for rcvr in RCVRS])) #outff2.write("\nNITS 3\n") # Create a master timfile master_timfn = "%s_all.tim" % psrname with open(master_timfn, 'w') as ff: for timfn in timfns: ff.write("INCLUDE %s\n" % timfn) utils.print_info("Wrote out master timfile: %s" % master_timfn)
def update_caldb(db, sourcename, force=False): """Check for new calibrator scans. If found update the calibrator database. Inputs: db: A Database object. sourcename: The name of the source to match. (NOTE: '_R' will be removed from the sourcename, if present) force: Forcefully update the caldb Outputs: caldb: The path to the updated caldb. """ name = utils.get_prefname(sourcename) if name.endswith('_R'): name = name[:-2] # Get the caldb caldb = get_caldb(db, name) if caldb is None: lastupdated = datetime.datetime.min outdir = os.path.join(config.output_location, 'caldbs') try: os.makedirs(outdir) except OSError: # Directory already exists pass outfn = '%s.caldb.txt' % name.upper() outpath = os.path.join(outdir, outfn) insert_new = True values = {'sourcename': name, 'caldbpath': outdir, 'caldbname': outfn} else: lastupdated = caldb['last_modified'] outpath = os.path.join(caldb['caldbpath'], caldb['caldbname']) insert_new = False values = {} with db.transaction() as conn: if not insert_new: # Mark update of caldb as in-progress update = db.caldbs.update().\ values(status='updating', last_modified=datetime.datetime.now()).\ where(db.caldbs.c.caldb_id == caldb['caldb_id']) conn.execute(update) select = db.select([db.files], from_obj=[db.files.\ outerjoin(db.obs, onclause=db.files.c.obs_id == db.obs.c.obs_id)]).\ where((db.files.c.status.in_(['new', 'done'])) & (db.files.c.stage == 'calibrated') & (db.obs.c.obstype == 'cal') & (db.obs.c.sourcename == ('%s_R' % name))) results = conn.execute(select) rows = results.fetchall() results.close() numnew = 0 for row in rows: if row['added'] > lastupdated: numnew += 1 utils.print_info("Found %d suitable calibrators for %s. " "%d are new." % (len(rows), name, numnew), 2) values['numentries'] = len(rows) try: if numnew or force: # Create an updated version of the calibrator database basecaldir = os.path.join(config.output_location, name.upper()+"_R") utils.execute(['pac', '-w', '-u', '.pcal.T', '-k', outpath], dir=basecaldir) except: #raise values['status'] = 'failed' if insert_new: action = db.caldbs.insert() else: action = db.caldbs.update().\ values(note='%d new entries added' % numnew, last_modifed=datetime.datetime.now()).\ where(db.caldbs.c.caldb_id == caldb['caldb_id']) conn.execute(action, values) else: if insert_new: action = db.caldbs.insert() else: action = db.caldbs.update().\ values(status='ready', note='%d new entries added' % numnew, last_modified=datetime.datetime.now()).\ where(db.caldbs.c.caldb_id == caldb['caldb_id']) conn.execute(action, values) return outpath
def deep_clean(toclean, chanthresh=None, subintthresh=None, binthresh=None): import psrchive # Temporarily, because python bindings # are not available on all computers if chanthresh is None: chanthresh = config.cfg.clean_chanthresh if subintthresh is None: subintthresh = config.cfg.clean_subintthresh if binthresh is None: binthresh = config.cfg.clean_binthresh ar = toclean.clone() ar.pscrunch() ar.remove_baseline() ar.dedisperse() # Remove profile data = ar.get_data().squeeze() template = np.apply_over_axes(np.sum, data, (0, 1)).squeeze() clean_utils.remove_profile_inplace(ar, template, None) ar.dededisperse() # First clean channels chandata = clean_utils.get_chans(ar, remove_prof=True) chanweights = clean_utils.get_chan_weights(ar).astype(bool) chanmeans = clean_utils.scale_chans(chandata.mean(axis=1), chanweights=chanweights) chanmeans /= clean_utils.get_robust_std(chanmeans, chanweights) chanstds = clean_utils.scale_chans(chandata.std(axis=1), chanweights=chanweights) chanstds /= clean_utils.get_robust_std(chanstds, chanweights) badchans = np.concatenate((np.argwhere(np.abs(chanmeans) >= chanthresh), \ np.argwhere(np.abs(chanstds) >= chanthresh))) badchans = np.unique(badchans) utils.print_info( "Number of channels to be de-weighted: %d" % len(badchans), 2) for ichan in badchans: utils.print_info("De-weighting chan# %d" % ichan, 3) clean_utils.zero_weight_chan(ar, ichan) clean_utils.zero_weight_chan(toclean, ichan) # Next clean subints subintdata = clean_utils.get_subints(ar, remove_prof=True) subintweights = clean_utils.get_subint_weights(ar).astype(bool) subintmeans = clean_utils.scale_subints(subintdata.mean(axis=1), \ subintweights=subintweights) subintmeans /= clean_utils.get_robust_std(subintmeans, subintweights) subintstds = clean_utils.scale_subints(subintdata.std(axis=1), \ subintweights=subintweights) subintstds /= clean_utils.get_robust_std(subintstds, subintweights) badsubints = np.concatenate((np.argwhere(np.abs(subintmeans) >= subintthresh), \ np.argwhere(np.abs(subintstds) >= subintthresh))) if config.debug.CLEAN: plt.subplots_adjust(hspace=0.4) chanax = plt.subplot(4, 1, 1) plt.plot(np.arange(len(chanmeans)), chanmeans, 'k-') plt.axhline(chanthresh, c='k', ls='--') plt.axhline(-chanthresh, c='k', ls='--') plt.xlabel('Channel Number', size='x-small') plt.ylabel('Average', size='x-small') plt.subplot(4, 1, 2, sharex=chanax) plt.plot(np.arange(len(chanstds)), chanstds, 'k-') plt.axhline(chanthresh, c='k', ls='--') plt.axhline(-chanthresh, c='k', ls='--') plt.xlabel('Channel Number', size='x-small') plt.ylabel('Standard Deviation', size='x-small') subintax = plt.subplot(4, 1, 3) plt.plot(np.arange(len(subintmeans)), subintmeans, 'k-') plt.axhline(subintthresh, c='k', ls='--') plt.axhline(-subintthresh, c='k', ls='--') plt.xlabel('Sub-int Number', size='x-small') plt.ylabel('Average', size='x-small') plt.subplot(4, 1, 4, sharex=subintax) plt.plot(np.arange(len(subintstds)), subintstds, 'k-') plt.axhline(subintthresh, c='k', ls='--') plt.axhline(-subintthresh, c='k', ls='--') plt.xlabel('Sub-int Number', size='x-small') plt.ylabel('Standard Deviation', size='x-small') plt.show() badsubints = np.unique(badsubints) utils.print_info( "Number of sub-ints to be de-weighted: %d" % len(badsubints), 2) for isub in badsubints: utils.print_info("De-weighting subint# %d" % isub, 3) clean_utils.zero_weight_subint(ar, isub) clean_utils.zero_weight_subint(toclean, isub) # Re-dedisperse the data ar.dedisperse() # Now replace hot bins utils.print_info("Will find and clean 'hot' bins", 2) clean_utils.clean_hot_bins(toclean, thresh=binthresh)
def run(self, ar): utils.print_info( 'Cleaning %s with %s' % (ar.get_filename(), self.name), 1) self._clean(ar)
def main(): db = database.Database() obs_id = args.obs_id obsinfo = get_obsinfo(db, obs_id) datestr = utils.mjd_to_datetime(obsinfo['start_mjd']).strftime("%Y%m%d") subdirs = [ datetime.datetime.now().strftime("%Y%m%dT%H:%M:%S"), datestr, obsinfo['sourcename'] ] subdirs.reverse() backupdir = os.path.join(config.output_location, "removed", *subdirs) print "Will remove database entries for obs ID %d" % obs_id print "Back-ups of existing files will be copied to %s" % backupdir log_ids, logfns = get_loginfo(db, obs_id) assert len(log_ids) == len(logfns) print "Will also remove %d logs" % len(log_ids) tmp = "\n".join(["Log ID: %d; %s" % xx for xx in zip(log_ids, logfns)]) utils.print_info(tmp, 1) rows = get_fileinfo(db, obs_id) file_ids = [ row['file_id'] for row in rows if not ((row['stage'] == 'grouped') or ( (row['stage'] == 'combined') and (not row['is_deleted']))) ] file_ids_left = [ row['file_id'] for row in rows if row['file_id'] not in file_ids ] fns = [ os.path.join(row['filepath'], row['filename']) for row in rows if row['file_id'] in file_ids ] print "Will also remove %d files" % len(rows) tmp = "\n".join(["File ID: %d; %s" % xx for xx in zip(file_ids, fns)]) utils.print_info(tmp, 1) diag_ids, diagfns = get_diaginfo(db, file_ids) assert len(diag_ids) == len(diagfns) print "Will also remove %d diagnostics" % len(diag_ids) tmp = "\n".join( ["Diagnostic ID: %d; %s" % xx for xx in zip(diag_ids, diagfns)]) utils.print_info(tmp, 1) qctrl_ids = get_qcinfo(db, file_ids) print "Will also remove %d quality control entries" % len(qctrl_ids) tmp = "\n".join(["QC ID: %d" % xx for xx in qctrl_ids]) utils.print_info(tmp, 1) reatt_ids = get_reattinfo(db, file_ids) print "Will also remove %d re-attempt entries" % len(reatt_ids) tmp = "\n".join(["Re-attempt ID: %d" % xx for xx in reatt_ids]) utils.print_info(tmp, 1) mysqldumpstr = dump_db_entries(db, obs_id, log_ids, file_ids, diag_ids) utils.print_info("MySQL dump:\n%s" % mysqldumpstr, 2) if not args.dryrun: try: # Make back-up directory oldumask = os.umask(0007) os.makedirs(backupdir) os.umask(oldumask) # Write mysql dump with open(os.path.join(backupdir, "db_entries.sql"), 'w') as ff: ff.write(mysqldumpstr) # Move files for src in fns + logfns + diagfns: fn = os.path.basename(src) dest = os.path.join(backupdir, fn) if os.path.isfile(src): # Make sure file exists (it may have already been deleted) shutil.move(src, dest) # Remove entries from the database with db.transaction() as conn: # Remove diagnostic entries delete = db.diagnostics.delete().\ where(db.diagnostics.c.diagnostic_id.in_(diag_ids)) results = conn.execute(delete) results.close() # Remove any quality control entries in the database delete = db.qctrl.delete().\ where(db.qctrl.c.qctrl_id.in_(qctrl_ids)) results = conn.execute(delete) results.close() # Remove obs' 'current_file_id' entry update = db.obs.update().\ where(db.obs.c.obs_id == row['obs_id']).\ values(current_file_id=None) results = conn.execute(update) results.close() # Remove file entries # (newest first because of foreign key constraints - parent_file_id column) for row in rows: if (row['stage'] == 'grouped') or \ ((row['stage'] == 'combined') and (not row['is_deleted'])): # Leave grouped files and undeleted combined files pass else: delete = db.files.delete().\ where(db.files.c.file_id == row['file_id']) results = conn.execute(delete) results.close() # # Do not delete log entries from the database even though log file was moved # # Update newest file left to have status new update = db.files.update().\ where(db.files.c.file_id == max(file_ids_left)).\ values(status='new', note='Data are being reprocessed.', last_modified=datetime.datetime.now()) conn.execute(update) except: print "Error encountered! Will attempt to un-move files." # Try to unmove files for src in fns + logfns + diagfns: fn = os.path.basename(src) dest = os.path.join(backupdir, fn) if os.path.isfile(dest) and not os.path.isfile(src): shutil.move(dest, src) if os.path.isdir(backupdir): try: os.remove(os.path.join(backupdir, "db_entries.sql")) os.rmdir(backupdir) except: print "Could not remove back-up dir %s" % backupdir raise else: print "Successfully reseted obs ID: %d" % obs_id
def main(): if args.dir_id is not None: # Get directory path from database dir_toremove = get_dir_from_id(args.dir_id) dir_id = arg.dir_id else: dir_toremove = os.path.join(config.base_rawdata_dir, args.dir) dir_id = get_id_from_dir(args.dir) if not dir_toremove.startswith(config.base_rawdata_dir): raise ValueError("Directory to remove (%s) is not in the raw " "data directory (%s)" % (dir_toremove, config.base_rawdata_dir)) subdirs = [datetime.datetime.now().strftime("%Y%m%dT%H:%M:%S")] tmp = dir_toremove while tmp and (os.path.abspath(config.base_rawdata_dir) != os.path.abspath(tmp)): tmp, tmp2 = os.path.split(tmp) subdirs.append(tmp2) subdirs.reverse() backupdir = os.path.join(config.output_location, "removed", *subdirs) print "Will remove database entries for data in %s" % dir_toremove print "Back-ups of existing files will be copied to %s" % backupdir db = database.Database() obs_ids = get_obsinfo(db, dir_id) print "Will also remove %d observations" % len(obs_ids) tmp = ", ".join(["%d" % xx for xx in obs_ids]) utils.print_info("Obs IDs: %s" % tmp, 1) log_ids, logfns = get_loginfo(db, obs_ids) assert len(log_ids) == len(logfns) print "Will also remove %d logs" % len(log_ids) tmp = "\n".join(["Log ID: %d; %s" % xx for xx in zip(log_ids, logfns)]) utils.print_info(tmp, 1) file_ids, fns = get_fileinfo(db, obs_ids) assert len(file_ids) == len(fns) print "Will also remove %d files" % len(file_ids) tmp = "\n".join(["File ID: %d; %s" % xx for xx in zip(file_ids, fns)]) utils.print_info(tmp, 1) diag_ids, diagfns = get_diaginfo(db, file_ids) assert len(diag_ids) == len(diagfns) print "Will also remove %d diagnostics" % len(diag_ids) tmp = "\n".join(["Diagnostic ID: %d; %s" % xx for xx in zip(diag_ids, diagfns)]) utils.print_info(tmp, 1) mysqldumpstr = dump_db_entries(db, dir_id, obs_ids, log_ids, file_ids, diag_ids) utils.print_info("MySQL dump:\n%s" % mysqldumpstr, 2) if not args.dryrun: try: # Make back-up directory os.makedirs(backupdir) # Write mysql dump with open(os.path.join(backupdir, "db_entries.sql"), 'w') as ff: ff.write(mysqldumpstr) # Move files for src in fns+logfns+diagfns: fn = os.path.basename(src) dest = os.path.join(backupdir, fn) if os.path.isfile(src): # Make sure file exists (it may have already been deleted) shutil.move(src, dest) # Remove entries from the database with db.transaction() as conn: # Remove diagnostic entries delete = db.diagnostics.delete().\ where(db.diagnostics.c.diagnostic_id.in_(diag_ids)) results = conn.execute(delete) results.close() # Remove file entries # (newest first because of foreign key constraints - parent_file_id column) for file_id in reversed(sorted(file_ids)): delete = db.files.delete().\ where(db.files.c.file_id == file_id) results = conn.execute(delete) results.close() # logs delete = db.logs.delete().\ where(db.logs.c.log_id.in_(log_ids)) results = conn.execute(delete) results.close() # obs delete = db.obs.delete().\ where(db.obs.c.obs_id.in_(obs_ids)) results = conn.execute(delete) results.close() # directory delete = db.directories.delete().\ where(db.directories.c.dir_id == dir_id) results = conn.execute(delete) results.close() except: print "Error encountered! Will attempt to un-move files." # Try to unmove files for src in fns+logfns+diagfns: fn = os.path.basename(src) dest = os.path.join(backupdir, fn) if os.path.isfile(dest) and not os.path.isfile(src): shutil.move(dest, src) if os.path.isdir(backupdir): try: os.rmdir(backupdir) except: print "Could not remove back-up dir %s" % backupdir raise else: print "Successfully scrubbed %s (ID: %d)" % (dir_toremove, dir_id)
def combine_subints(subdirs, subints, parfn=None, outdir=None): """Combine sub-ints from various freq sub-band directories. The input lists are as created by 'group_subband_dirs' or read-in by 'read_listing'. Inputs: subdirs: List of sub-band directories containing sub-ints to combine subints: List of subint files to be combined. (NOTE: These are the file name only (i.e. no path) Each file listed should appear in each of the subdirs.) parfn: New ephemeris to install when combining subints. (Default: Use ephemeris in archive file's header) outdir: Directory to output combined file. (Default: Current working directory) Output: outfn: The name of the combined file. """ if outdir is None: outdir = os.getcwd() subints = sorted(subints) tmpdir = tempfile.mkdtemp(suffix="_combine", dir=config.tmp_directory) devnull = open(os.devnull) try: cmbsubints = [] # Try to normalise the archive's parfile try: if parfn is None: arfn = os.path.join(subdirs[0], subints[0]) normparfn = utils.get_norm_parfile(arfn) else: normparfn = utils.normalise_parfile(parfn) except errors.InputError: # No parfile present parargs = [] else: parargs = ['-E', normparfn] utils.print_info("Adding freq sub-bands for each sub-int...", 2) for ii, subint in enumerate(utils.show_progress(subints, width=50)): to_combine = [os.path.join(path, subint) for path in subdirs] outfn = os.path.join(tmpdir, "combined_%s" % subint) cmbsubints.append(outfn) utils.execute(['psradd', '-q', '-R', '-o', outfn] + parargs + to_combine, stderr=devnull) arf = utils.ArchiveFile( os.path.join(tmpdir, "combined_%s" % subints[0])) outfn = os.path.join( outdir, "%s_%s_%s_%05d_%dsubints.cmb" % (arf['name'], arf['band'], arf['yyyymmdd'], arf['secs'], len(subints))) utils.print_info("Combining %d sub-ints..." % len(cmbsubints), 1) utils.execute(['psradd', '-q', '-o', outfn] + cmbsubints, stderr=devnull) except: raise # Re-raise the exception finally: if debug.is_on('reduce'): warnings.warn("Not cleaning up temporary directory (%s)" % tmpdir, \ errors.CoastGuardWarning) else: utils.print_info("Removing temporary directory (%s)" % tmpdir, 2) shutil.rmtree(tmpdir) return outfn
def main(): if args.outfn is not None: outfile = open(args.outfn, 'w') else: outfile = sys.stdout if args.interp_method not in ("linear", "nearest", "quadratic", "cubic", "median"): # Assume a file of paramters is provided fitfn = args.interp_method if os.path.isfile(fitfn): args.interp_method = "file" fitparams = np.loadtxt(fitfn, unpack=False) get_correction = maser_gps_fit_factory(fitparams) else: raise ValueError("Interpolation method (%s) is not recognized " "nor is it a file of parameters!" % args.interp_method) if args.include_clock_offsets: clock_offsets = CLOCK_OFFSETS clock_mjds = [float(clk[0]) for clk in CLOCK_OFFSETS if np.isfinite(clk[0])] + \ [float(clk[1]) for clk in CLOCK_OFFSETS if np.isfinite(clk[1])] clock_mjds = sorted(set(clock_mjds)) else: clock_offsets = [] clock_mjds = [] end_mjd = args.end_mjd if end_mjd is None: end_mjd = rsutils.mjdnow() mjds = get_mjds(args.start_mjd, end_mjd, args.interval, args.num_per_day, additional=clock_mjds) curr = None if args.include_clock_offsets: outfile.write("# UTC(EFFIX) UTC(GPS)\n") outfile.write("# Effelsberg Asterix/PSRix clock correction file\n") else: outfile.write("# UTC(EFF) UTC(GPS)\n") outfile.write("# Effelsberg clock correction file\n") outfile.write("# Generated on %s with %s (by P. Lazarus) \n" % (datetime.datetime.now().strftime("%B %d, %Y"), __file__)) outfile.write("# The following clock offsets are included:\n") if clock_offsets: for start_mjd, end_mjd, clkoff in clock_offsets: outfile.write("# MJD: %5s to %5s; offset=%g s\n" % (start_mjd, end_mjd, clkoff)) else: outfile.write("# None\n") outfile.write("#\n") if args.include: with open(args.include, 'r') as inclff: for line in inclff: line = line.strip() if not line or line.startswith('#'): continue mjd = float(line.split()[0]) if mjd < mjds[0]: outfile.write(line + "\n") # Include clock offsets clkoff = 0 for mjd in mjds: imjd = int(mjd) try: if args.interp_method == "file": # File containing parameters # get_correction is defined above when the fit-file is read pass else: if curr != imjd: utils.print_info( "Getting maser corrections for MJD %05d" % imjd, 1) # Get corrections data = get_maser_data(imjd) if args.interp_method == "median": get_correction = lambda mjd: np.median(data[:, 1]) elif args.interp_method in ("linear", "nearest", "quadratic", "cubic"): get_correction = interpolate.interp1d( data[:, 0], data[:, 1], kind=args.interp_method) curr = imjd correction = get_correction(mjd) if correction > 0.5: correction -= 1 elif correction < -0.5: correction += 1 correction = -correction if float(mjd) in clock_mjds: if clkoff: outfile.write("%.6f\t%.12e # Clock offset: %g s\n" % (mjd, correction + clkoff, clkoff)) else: outfile.write("%.6f\t%.12e\n" % (mjd, correction)) for start_mjd, end_mjd, offset in clock_offsets: if start_mjd <= mjd < end_mjd: clkoff = offset break if clkoff: outfile.write("%.6f\t%.12e # Clock offset: %g s\n" % (mjd, correction + clkoff, clkoff)) else: outfile.write("%.6f\t%.12e\n" % (mjd, correction)) except NoMaserFileFound: outfile.write("# Cannot determine clock correction for MJD %g: " \ "No maser file found for MJD %d\n" % (mjd, imjd)) except NoMaserData: outfile.write("# Cannot determine clock correction for MJD %g: " \ "No maser data parsed from file for MJD %d\n" % (mjd, imjd)) except ValueError, exc: outfile.write("# Cannot determine clock correction for MJD %g: " \ "%s\n" % (mjd, str(exc)))
def _clean(self, ar): # plot = self.configs.plot # if plot: # import matplotlib.pyplot as plt patient = ar.clone() patient.pscrunch() patient.remove_baseline() # Remove profile from dedispersed data patient.dedisperse() utils.print_info('Loading template', 2) data = patient.get_data().squeeze() if self.configs.template is None: # Sum over all axes except last, which is phase bins template = np.apply_over_axes(np.sum, data, tuple(range(data.ndim - 1))).squeeze() # smooth data template = savgol_filter(template, 5, 1) else: template_ar = psrchive.Archive_load(self.configs.template) template_ar.pscrunch() template_ar.remove_baseline() template_ar.dedisperse() if len(template_ar.get_frequencies()) > 1 and len( template_ar.get_frequencies()) < len( patient.get_frequencies()): utils.print_info( "Template channel number doesn't match data... f-scrunching!", 2) template_ar.fscrunch() template_data = template_ar.get_data().squeeze() template = np.apply_over_axes(np.sum, template_data, tuple(range(template_data.ndim - 1))).squeeze() # make sure template is 1D if len(np.shape(template)) > 1: # sum over frequencies too template_ar.fscrunch() utils.print_info( "2D template found. Assuming it has same frequency coverage and channels as data!", 2) template_phs = np.apply_over_axes( np.sum, template_data, tuple(range(template_data.ndim - 1))).squeeze() else: template_phs = template utils.print_info('Estimating template and profile phase offset', 2) if self.configs.template is None: phase_offset = 0 else: # Calculate phase offset of template in number of bins, using full obs # Get profile data of full obs profile = np.apply_over_axes(np.sum, data, tuple(range(data.ndim - 1))).squeeze() if np.shape(template_phs) != np.shape(profile): utils.print_info( 'Template and profile have different numbers of phase bins', 2) #err = (lambda (amp, phs, base): amp*clean_utils.fft_rotate(template_phs, phs) + base - profile) err = (lambda amp_phs: amp_phs[0] * clean_utils.fft_rotate( template_phs, amp_phs[1]) - profile) amp_guess = np.median(profile) / np.median(template_phs) phase_guess = -(np.argmax(profile) - np.argmax(template_phs)) amp_phs_guess = [amp_guess, phase_guess] #params, status = leastsq(err, [amp_guess, phase_guess, np.min(profile) - np.min(template_phs)]) params, status = leastsq(err, amp_phs_guess) phase_offset = params[1] utils.print_info( 'Template phase offset = {0}'.format(round(phase_offset, 3)), 2) utils.print_info('Removing profile from patient', 2) # if plot: # preop_patient = patient.clone() # preop_weights = preop_patient.get_weights() clean_utils.remove_profile_inplace(patient, template, phase_offset) utils.print_info('Accessing weights and applying to patient', 2) # re-set DM to 0 # patient.dededisperse() # Get weights weights = patient.get_weights() # Get data (select first polarization - recall we already P-scrunched) data = patient.get_data()[:, 0, :, :] data = clean_utils.apply_weights(data, weights) # if plot: # preop_data = preop_patient.get_data()[:,0,:,:] # preop_patient = [] # clear for the sake of memory # preop_data = clean_utils.apply_weights(preop_data, weights) # Mask profiles where weight is 0 mask_2d = np.bitwise_not(np.expand_dims(weights, 2).astype(bool)) mask_3d = mask_2d.repeat(ar.get_nbin(), axis=2) data = np.ma.masked_array(data, mask=mask_3d) # if plot: # preop_data = np.ma.masked_array(preop_data, mask=mask_3d) utils.print_info('Masking on-pulse region as determined from template', 2) # consider residual only in off-pulse region if len(np.shape(template)) > 1: # sum over frequencies utils.print_info( 'Estimating on-pulse region by f-scrunching 2D template', 2) template_ar.fscrunch() template_1D = np.apply_over_axes(np.sum, template_ar.get_data(), (0, 1)).squeeze() else: template_1D = template # Rotate template by apropriate amount template_rot = clean_utils.fft_rotate(template_1D, phase_offset).squeeze() # masked_template = np.ma.masked_greater(template_rot, np.min(template_rot) + 0.01*np.ptp(template_rot)) masked_template = np.ma.masked_greater(template_rot, np.median(template_rot)) masked_std = np.ma.std(masked_template) # use this std of masked data as cutoff masked_template = np.ma.masked_greater( template_rot, np.median(template_rot) + masked_std) # if plot: # plt.figure(figsize=(10, 5)) # plt.subplot(1, 2, 1) # plt.plot(np.apply_over_axes(np.sum, preop_data, tuple(range(data.ndim - 1))).squeeze(), alpha=1) # # Do fit again to scale template # subchan, err, params = clean_utils.remove_profile1d(np.apply_over_axes(np.sum, preop_data, (0, 1)).squeeze(), 0, 0, template_rot, 0, return_params=True) # # plt.plot(params[0]*template_rot + params[1], alpha=0.5) # # plt.plot(params[0]*masked_template + params[1], 'k') # plt.plot(params[0]*template_rot, alpha=0.5) # plt.plot(params[0]*masked_template, 'k') # plt.legend(('Pre-op data', 'Scaled and rotated template', 'Masked template')) # Loop through chans and subints to mask on-pulse phase bins for ii in range(0, np.shape(data)[0]): for jj in range(0, np.shape(data)[1]): data.mask[ii, jj, :] = masked_template.mask data = np.ma.masked_array(data, mask=data.mask) # if plot: # plt.subplot(1, 2, 2) # plt.plot(np.apply_over_axes(np.ma.sum, data, tuple(range(data.ndim - 1))).squeeze()) # plt.title("Residual data") # plt.savefig('data_and_template.png') utils.print_info( 'Calculating robust statistics to determine where RFI removal is required', 2) # RFI-ectomy must be recommended by average of tests # BWM: Ok, so this is where the magical stuff actually happens - need to know actually WHAT are the comprehensive stats # DJR: At this stage the stats are; (found to work well experimentally) # geometric mean, peak-to-peak, standard deviation, normaltest. # In original coast_guard they were; # mean, peak-to-peak, standard deviation, and max value of FFT avg_test_results = clean_utils.comprehensive_stats(data, axis=2, \ chanthresh=self.configs.chanthresh, \ subintthresh=self.configs.subintthresh, \ chan_order=self.configs.chan_order, \ chan_breakpoints=self.configs.chan_breakpoints, \ chan_numpieces=self.configs.chan_numpieces, \ subint_order=self.configs.subint_order, \ subint_breakpoints=self.configs.subint_breakpoints, \ subint_numpieces=self.configs.subint_numpieces, \ ) utils.print_info('Applying RFI masking weights to archive', 2) for (isub, ichan) in np.argwhere(avg_test_results >= 1): # Be sure to set weights on the original archive, and # not the clone we've been working with. integ = ar.get_Integration(int(isub)) integ.set_weight(int(ichan), 0.0) freq_fraczap = clean_utils.freq_fraczap(ar)