Exemple #1
0
def remove_bad_subints(infn, badsubints=None, badsubint_intervals=None):
    """Zero-weights bad subints.
        The file is modified in-place. However, zero-weighting 
        is used for trimming, so the process is reversible.

        Note: Subints are indexed starting at 0.

        Inputs:
            infn: name of time to remove subints from.
            badchans: A list of subints to remove 
            badchan_intervals: A list of subint intervals 
                (inclusive) to remove
    
        Outputs:
            None
    """
    if badsubints is None:
        badsubints = config.cfg.badsubints
    if badsubint_intervals is None:
        badsubint_intervals = config.cfg.badsubint_intervals

    zaplets = []
    if badsubints:
        zaplets.append("-w '%s'" % " ".join(['%d' % ww for ww in badsubints]))
    if badsubint_intervals:
        zaplets.extend(["-W '%d %d'" % lohi for lohi in badsubint_intervals])

    if zaplets:
        utils.print_info("Removing bad subints.", 2)
        utils.execute("paz -m %s %s" % (" ".join(zaplets), infn.fn))
Exemple #2
0
def get_obslog_entry(arf, tolerant=False):
    """Given an archive file, find the entry in the observing log.

        Inputs:
            arf: ArchiveFile object.
            tolerant: Be tolerant with name matching. 
                This is important for flux-cal observations.
                (Default: False)

        Output:
            obsinfo: A dictionary of observing information.
    """
    obsdt_utc, names = __prep_obslog_search(arf, tolerant)

    logentries = __obslog_db_match(obsdt_utc, names)
    if not logentries:
        utils.print_info(
            'No matches found in obslog DB. Searching text files.', 1)
        logentries = __obslog_file_match(obsdt_utc, names)

    if len(logentries) != 1:
        msg = "Bad number (%d) of entries " \
              "in obslog with correct source name (%s) " \
              "close to observation (%s) start time (UTC: %s)" % \
                    (len(logentries), arf['name'], arf.fn, obsdt_utc.strftime('%c'))
        if len(logentries) > 1:
            msg += ":\n%s" % \
                    "\n".join([pprint.pformat(entry) for entry in logentries])
        raise errors.HeaderCorrectionError(msg)
    return logentries[0]
Exemple #3
0
def main():
    if args.file_ids:
        rows = get_files_by_id(args.file_ids)
    else:
        rows = get_files(args.psrnames, args.type)
    info = {}

    utils.sort_by_keys(rows, args.sortkeys)
    for row in rows:
        if args.fmt is not None:
            print args.fmt.decode('string-escape') % row
        else:
            print os.path.join(row['filepath'], row['filename'])
            utils.print_info(
                "    File ID: %(file_id)d; "
                "Obs ID: %(obs_id)d; "
                "Status: %(status)s; "
                "Stage: %(stage)s; "
                "QC passed: %(qcpassed)s" % row, 2)
        info['Total'] = info.get('Total', 0) + 1
        info['QC Passed'] = info.get('QC Passed', 0) + int(
            bool(row['qcpassed']))
        info['Status %s' % row['status']] = \
            info.get('Status %s' % row['status'], 0)+1
        info['Stage %s' % row['stage']] = \
            info.get('Stage %s' % row['stage'], 0)+1
    utils.print_info(
        "Summary:\n    %s" %
        "\n    ".join(["%s: %d" % xx for xx in info.iteritems()]), 1)
Exemple #4
0
def prepare_subints(subdirs,
                    subints,
                    baseoutdir,
                    trimpcnt=6.25,
                    effix=False,
                    backend=None):
    """Prepare subints by
           - Copying them to the temporary working directory
           - De-weighting a percentage from each sub-band edge
           - Converting archive format to PSRFITS

        Inputs:
            subdirs: List of sub-band directories containing 
                sub-ints to combine
            subints: List of subint files to be combined.
                (NOTE: These are the file name only (i.e. no path)
                    Each file listed should appear in each of the
                    subdirs.)
            baseoutdir: Directory containing the sub-directories
                of preprared files.
            trimpcnt: Percentage (ie between 0-100) of subband
                to trim from _each_ edge of the band. 
                (Default: 6.25%)
            effix: Change observation site to eff_psrix to correct 
                for asterix clock offsets. (Default: False)
            backend: Name of the backend. (Default: leave as is)

        Outputs:
            prepsubdirs: The sub-directories containing prepared files.
    """
    devnull = open(os.devnull)
    tmpsubdirs = []
    for subdir in utils.show_progress(subdirs, width=50):
        freqdir = os.path.split(os.path.abspath(subdir))[-1]
        freqdir = os.path.join(baseoutdir, freqdir)
        try:
            os.makedirs(freqdir)
        except OSError:
            # Directory already exists
            pass
        fns = [os.path.join(subdir, fn) for fn in subints]
        preproc = 'convert psrfits'
        if effix:
            preproc += ',edit site=eff_psrix'
        if backend:
            if ("," in backend) or ("=" in backend) or (' ' in backend):
                raise errors.UnrecognizedValueError("Backend value (%s) is "
                                                    "invalid. It cannot "
                                                    "contain ',' or '=' or "
                                                    "' '" % backend)
            preproc += ',edit be:name=%s' % backend
        utils.execute(
            ['paz', '-j', preproc, '-E',
             '%f' % trimpcnt, '-O', freqdir] + fns,
            stderr=devnull)
        tmpsubdirs.append(freqdir)
    utils.print_info(
        "Prepared %d subint fragments in %d freq sub-dirs" %
        (len(subints), len(subdirs)), 3)
    return tmpsubdirs
Exemple #5
0
    def __prune_band_edges(self, ar):
        """Prune the edges of the band. This is useful for
           removing channels where there is no response.
           The file is modified in-place. However, zero-weighting
           is used for pruning, so the process is reversible.

           Inputs:
               ar: The psrchive archive object to clean.

           Outputs:
               None
        """
        if self.configs.response is None:
            utils.print_info('No freq range specified for band pruning. Skipping...', 2)
        else:
            lofreq, hifreq = self.configs.response
            # Use absolute value in case band is flipped (BW<0)
            # bw = ar.get_bandwidth()  # assigned but never used
            nchan = ar.get_nchan()
            # chanbw = bw/nchan  # assigned but never used
            utils.print_info('Pruning frequency band to (%g-%g MHz)' % (lofreq, hifreq), 2)
            # Loop over channels
            for ichan in range(nchan):
                # Get profile for subint=0, pol=0
                prof = ar.get_Profile(0, 0, ichan)
                freq = prof.get_centre_frequency()
                if (freq < lofreq) or (freq > hifreq):
                    clean_utils.zero_weight_chan(ar, ichan)
Exemple #6
0
def combine_all(infns, outfn, expected_nsubbands=None):
    """Given a list of ArchiveFile objects group them into sub-bands
        then remove the edges of each sub-band to remove the artifacts
        caused by aliasing. Finally, combine the sub-bands into a single 
        output file.

        The combined sub-band files are not saved.

        Inputs:
            infns: A list of input ArchiveFile objects.
            outfn: The output file's name.
            expected_nsubbands: The expected number of subbands for each 
                subintegration.

        Outputs:
            combinedfns: A list of output (combined) files.
    """
    if expected_nsubbands is None:
        expected_nsubbands = config.cfg.expected_nsubbands

    infns = check_files(infns, expected_nsubbands=expected_nsubbands)
    groups = group_files(infns)
    combinedfiles = []
    # Combine files from the same sub-band in the time direction
    for group in groups:
        subbands = []
        for ctr_freq, to_combine in utils.group_by_ctr_freq(group).iteritems():
            utils.print_info("Combining %d subints at ctr freq %d MHz" % \
                                (len(to_combine), ctr_freq), 3)

            # Combine sub-integrations for this sub-band
            subfn = utils.get_outfn(outfn + ".%(freq)dMHz", to_combine[0])
            if subfn in [f.fn for f in subbands]:
                warnings.warn("'combined_all(...)' is overwritting files it " \
                                "previously created!")
            subband = combine_subints(to_combine, subfn)
            clean.trim_edge_channels(subband)
            subbands.append(subband)

        combinedfn = utils.get_outfn(outfn, subbands[0])
        utils.print_info("Combining %d subbands into %s" % \
                            (len(subbands), combinedfn), 3)
        if combinedfn in [f.fn for f in combinedfiles]:
            warnings.warn("'combined_all(...)' is overwritting files it " \
                            "previously created!")
        combinedfile = combine_subbands(subbands, combinedfn)
        combinedfiles.append(combinedfile)

        if not config.debug.INTERMEDIATE:
            # Remove the temporary combined files
            for sub in subbands:
                os.remove(sub.fn)
    return combinedfiles
Exemple #7
0
def remove_bad_channels(infn,
                        badchans=None,
                        badchan_intervals=None,
                        badfreqs=None,
                        badfreq_intervals=None):
    """Zero-weight bad channels and channels containing bad
        frequencies.
        The file is modified in-place. However, zero-weighting 
        is used for trimming, so the process is reversible.

        Note: Channels are indexed starting at 0.

        Inputs:
            infn: name of time to remove channels from.
            badchans: A list of channels to remove 
            badchan_intervals: A list of channel intervals 
                (inclusive) to remove
            badfreqs: A list of frequencies. The channels
                containing these frequencies will be removed.
            badfreq_intervals: A list of frequency ranges 
                to remove. The channels containing these
                frequencies will be removed.
    
        Outputs:
            None
    """
    if badchans is None:
        badchans = config.cfg.badchans
    if badchan_intervals is None:
        badchan_intervals = config.cfg.badchan_intervals
    if badfreqs is None:
        badfreqs = config.cfg.badfreqs
    if badfreq_intervals is None:
        badfreq_intervals = config.cfg.badfreq_intervals

    zaplets = []
    if badchans:
        zaplets.append("-z '%s'" % " ".join(['%d' % zz for zz in badchans]))
    if badchan_intervals:
        zaplets.extend(["-Z '%d %d'" % lohi for lohi in badchan_intervals])
    if badfreqs:
        zaplets.append("-f '%s'" % " ".join(['%f' % ff for ff in badfreqs]))
    if badfreq_intervals:
        zaplets.extend(["-F '%f %f'" % lohi for lohi in badfreq_intervals])

    if zaplets:
        utils.print_info("Removing bad channels.", 2)
        utils.execute("paz -m %s %s" % (" ".join(zaplets), infn.fn))
Exemple #8
0
def prune_band(infn, response=None):
    """Prune the edges of the band. This is useful for
        removing channels where there is no response.
        The file is modified in-place. However, zero-weighting 
        is used for pruning, so the process is reversible.

        Inputs:
            infn: name of file to trim.
            response: A tuple specifying the range of frequencies 
                outside of which should be de-weighted.

        Outputs:
            None
    """
    if response is None:
        response = config.cfg.rcvr_response_lims

    if response is None:
        utils.print_info(
            "No freq range specified for band pruning. Skipping...", 2)
    else:
        # Use absolute value in case band is flipped (BW<0)
        lofreq = infn['freq'] - np.abs(0.5 * infn['bw'])
        hifreq = infn['freq'] + np.abs(0.5 * infn['bw'])
        utils.print_info("Pruning frequency band to (%g-%g MHz)" % response, 2)
        utils.print_debug("Archive's freq band (%g-%g MHz)" % \
                            (lofreq, hifreq), 'clean')
        pazcmd = 'paz -m %s ' % infn.fn
        runpaz = False  # Only run paz if either of the following clauses are True
        if response[0] > lofreq:
            # Part of archive's low freqs are outside rcvr's response
            pazcmd += '-F "%f %f" ' % (lofreq, response[0])
            runpaz = True
        if response[1] < hifreq:
            # Part of archive's high freqs are outside rcvr's response
            pazcmd += '-F "%f %f" ' % (response[1], hifreq)
            runpaz = True
        if runpaz:
            utils.execute(pazcmd)
        else:
            warnings.warn("Not pruning band edges! All data are " \
                            "within the receiver's response.", \
                            errors.CoastGuardWarning)
Exemple #9
0
def get_files_to_combine(rows, max_span=1, min_snr=0):
    """Given a list of result sets from the database return a list of
        filenames to combine to make a template.

        Inputs:
            rows: A list of database result sets as returned by
                'get_files()'.
            max_span: The maximum allowable span, in days, from the 
                first data file to the last data file to combine. 
                (Default: 1 day)
            min_snr: Ignore data files with SNR lower than this value.
                (Default: 0)

        Output:
            files: A list of file names to combine.
    """
    utils.sort_by_keys(rows, ['start_mjd'])
    info = []
    for ii, row in enumerate(rows):
        jj = ii
        tot = 0
        for jj in range(ii, len(rows)):
            if (rows[jj]['start_mjd'] - row['start_mjd']) > max_span:
                break
            snr = (rows[jj]['snr']
                   or 0)  # This will replace None values with 0
            if snr >= min_snr:
                tot += snr
            jj += 1
        info.append((ii, tot, jj - ii))
    if not info:
        return []
    ind, snr, nn = max(info, key=lambda aa: aa[1])
    utils.print_info(
        "Highest total SNR is %g for %d files starting "
        "at index %d." % (snr, nn, ind), 2)
    touse = rows[ind:ind + nn]
    utils.sort_by_keys(touse, ['snr_r'])
    return [
        os.path.join(rr['filepath'], rr['filename']) for rr in touse
        if (rr['snr'] or 0) >= min_snr
    ]
Exemple #10
0
def clean_archive(inarf, outfn, clean_re=None, *args, **kwargs):
    import psrchive  # Temporarily, because python bindings
    # are not available on all computers

    if clean_re is None:
        clean_re = config.cfg.clean_strategy
    try:
        outfn = utils.get_outfn(outfn, inarf)
        shutil.copy(inarf.fn, outfn)

        outarf = utils.ArchiveFile(outfn)

        trim_edge_channels(outarf)
        prune_band(outarf)
        remove_bad_channels(outarf)
        remove_bad_subints(outarf)

        matching_cleaners = [
            clnr for clnr in cleaners
            if clean_re and re.search(clean_re, clnr)
        ]
        if len(matching_cleaners) == 1:
            ar = psrchive.Archive_load(outarf.fn)
            cleaner = eval(matching_cleaners[0])
            utils.print_info(
                "Cleaning using '%s(...)'." % matching_cleaners[0], 2)
            cleaner(ar, *args, **kwargs)
            ar.unload(outfn)
        elif len(matching_cleaners) == 0:
            utils.print_info("No cleaning strategy selected. Skipping...", 2)
        else:
            raise errors.CleanError("Bad cleaner selection. " \
                                    "'%s' has %d matches." % \
                                    (clean_re, len(matching_cleaners)))
    except:
        # An error prevented cleaning from being successful
        # Remove the output file because it may confuse the user
        if os.path.exists(outfn):
            os.remove(outfn)
        raise
    return outarf
def main():
    rows = get_files(args.psrnames, retry=args.retry)
    info = {}

    psrnameset = set([row['sourcename'] for row in rows])
    utils.sort_by_keys(rows, args.sortkeys)
    db = database.Database()
    with db.transaction() as conn:
        for row in rows:
            if row['obstype'] == 'pulsar':
                calscans = reduce_data.get_potential_polcal_scans(
                    db, row['obs_id'])
                cancal = bool(calscans)
            sys.stdout.write(args.fmt.decode('string-escape') % row)
            if row['obstype'] == 'pulsar':
                sys.stdout.write("\t%s\n" % cancal)
                utils.print_info(
                    "Number of potential calibrator scans: %d" % len(calscans),
                    1)
                msg = "    %s" % "\n    ".join([
                    "Obs ID: %d; File ID: %d; %s" %
                    (calrow['obs_id'], calrow['file_id'], calrow['filename'])
                    for calrow in calscans if type(calrow) is not str
                ])
                utils.print_info(msg, 2)
            else:
                sys.stdout.write("\n")
            if args.retry:
                for desc in reduce_data.get_all_descendents(
                        row['file_id'], db):
                    if (desc['status'] == 'failed') and (desc['stage']
                                                         == 'calibrated'):
                        # File has been calibrated, but it failed. Do not retry.
                        cancal = False
                        utils.print_info(
                            "Calibration of file %d has previously failed. Will _not_ retry."
                            % row['file_id'], 1)
                if (cancal and (row['status'] != 'failed')) or (
                        not cancal and (row['status'] == 'calfail')):
                    retry(db, row['file_id'])
                    utils.print_info(
                        "Will retry calibration of file %d" % row['file_id'],
                        1)
        if args.retry:
            for name in psrnameset:
                try:
                    reduce_data.reattempt_calibration(db, name)
                    calibrate.update_caldb(db, name, force=True)
                except:
                    pass
Exemple #12
0
    def __trim_edge_channels(self, ar):
        """Trim the edge channels of an input file to remove
           band-pass roll-off and the effect of aliasing.
           The file is modified in-place. However, zero-weighting
           is used for trimming, so the process is reversible.

           Inputs:
               ar: The psrchive archive object to clean.

           Outputs:
               None
        """
        nchan = ar.get_nchan()
        bw = float(ar.get_bandwidth())
        num_to_trim = max(self.configs.trimnum,
                          int(self.configs.trimfrac * nchan + 0.5),
                          int(self.configs.trimbw / bw * nchan + 0.5))
        if num_to_trim > 0:
            utils.print_info('Trimming %d channels from each band-edge.' % num_to_trim, 2)
            for ichan in range(num_to_trim):
                clean_utils.zero_weight_chan(ar, ichan)  # trim at beginning
                clean_utils.zero_weight_chan(ar, nchan - ichan - 1)  # trim at end
Exemple #13
0
def group_subband_dirs(subdirs, maxspan=None, maxgap=None, \
            tossfrac=None, filetype='subint'):
    """Based on file names group sub-ints from different
        sub-bands. Each subband is assumed to be in a separate
        directory.

        Inputs:
            subdirs: List of sub-band directories
            maxspan: Maximum span, in seconds, between first and 
                last sub-int in a combined file.
            maxgap: Maximum gap, in seconds, permitted before 
                starting a new output file.
            tossfrac: Fraction of sub-ints required for a 
                sub-band to be combined. If a sub-band has
                fewer than tossfrac*N_subint sub-ints it
                will be excluded.
            filetype: Type of files being grouped. Can be 'subint',
                or 'single'. (Default: 'subint')

        Outputs:
            usedirs: List of directories to use when combining.
                (NOTE: This may be different than the input
                    'subdirs' because some directories may have
                    too few subints to be worth combining. This
                    depends on the input value of 'tossfrac'.)
            groups: List of groups of files to be combined.
                (NOTE: These are the file name only (i.e. no path)
                    Each file listed appears in each of 'usedirs'.)
    """
    if maxspan is None:
        maxspan = config.cfg.combine_maxspan
    if maxgap is None:
        maxgap = config.cfg.combine_maxgap
    if tossfrac is None:
        tossfrac = 1 - config.cfg.missing_subint_tolerance

    if filetype not in FILETYPE_SPECIFICS:
        raise errors.InputError("File type (%s) is not recognized. " \
                                "Possible values are: '%s'" % \
                            (filetype, "', '".join(FILETYPE_SPECIFICS.keys())))
    else:
        globpat, get_start = FILETYPE_SPECIFICS[filetype]

    # Ensure paths are absolute
    subdirs = [os.path.abspath(path) for path in subdirs]
    utils.print_debug("Grouping subints from %d sub-band directories" % \
                        len(subdirs), 'combine')

    nindirs = len(subdirs)
    nsubbands = len(subdirs)
    nperdir = collections.Counter()
    noccurs = collections.Counter()
    nintotal = 0
    for subdir in subdirs:
        fns = glob.glob(os.path.join(subdir, globpat))
        nn = len(fns)
        utils.print_debug("Found %d sub-int files in %s" % \
                            (nn, subdir), 'combine')
        nintotal += nn
        nperdir[subdir] = nn
        noccurs.update([os.path.basename(fn) for fn in fns])
    nsubints = len(noccurs)

    # Remove sub-bands that have too few subints
    thresh = tossfrac * nsubints
    for ii in xrange(len(subdirs) - 1, -1, -1):
        subdir = subdirs[ii]
        if nperdir[subdir] < thresh:
            utils.print_info("Ignoring sub-ints from %s. " \
                    "It has too few sub-ints (%d < %d; tossfrac: %f)" % \
                    (subdir, nperdir[subdir], thresh, tossfrac), 2)
            subdirs.pop(ii)
            del nperdir[subdir]

            fns = glob.glob(os.path.join(subdir, globpat))
            noccurs.subtract([os.path.basename(fn) for fn in fns])
            nsubbands -= 1

    # Remove subints that are no longer included in any subbands
    to_del = []
    for fn in noccurs:
        if not noccurs[fn]:
            to_del.append(fn)
    for fn in to_del:
        del noccurs[fn]

    # Now combine subints
    lastsubint = datetime.datetime.min
    filestart = datetime.datetime.min
    groups = []
    if nsubbands:
        for subint in sorted(noccurs):
            if noccurs[subint] < nsubbands:
                utils.print_info("Ignoring sub-int (%s). It doesn't apear in all " \
                                "subbands (only %d of %d)" % \
                                (subint, noccurs[subint], nsubbands), 2)
                continue
            start = get_start(os.path.join(subdirs[0], subint))
            if ((start - filestart).total_seconds() > maxspan) or \
                        ((start - lastsubint).total_seconds() > maxgap):
                filestart = start
                utils.print_debug("Starting a new file at %s" % \
                        filestart, 'combine')
                # Start a new file
                groups.append([])
            groups[-1].append(subint)
            lastsubint = start
    nused = sum([len(grp) for grp in groups])
    utils.print_info("Grouped %d files from %d directories into %d groups.\n" \
                     "(Threw out %d directories and %d files)" % \
                     (nintotal, nindirs, len(groups), nindirs-len(subdirs), \
                        nintotal-nused), 2)
    return subdirs, groups
Exemple #14
0
 def run(self, ar):
     utils.print_info(
         "Cleaning '%s' with %s" % (ar.get_filename(), self.name), 1)
     utils.print_debug("Cleaning parameters: %s" % self.get_config_string(),
                       'clean')
     self._clean(ar)
Exemple #15
0
def main():
    psrname = utils.get_prefname(args.psrname)

    if args.nchan == 1:
        ext = '.FTp'
        scrunchargs = ['-F']
    elif args.nchan > 1:
        ext = '.Tp.F%d' % args.nchan
        scrunchargs = ['--setnchn', '%d' % args.nchan]
    else:
        raise ValueError("Cannot scrunch using negative number of "
                         "channels (nchan=%d)" % args.nchan)

    #psrdirs = dict([(utils.get_prefname(os.path.basename(dd)),
    #                 os.path.basename(dd))
    #                for dd in glob.glob(os.path.join(PARFILE_DIR, '*'))
    #                if os.path.isdir(dd)])

    #if psrname in psrdirs:
    #    legacydir = os.path.join('/homes/plazarus/research/epta-legacy/',
    #                             psrdirs[psrname])
    #else:
    #    legacydir = None

    # Copy EPTA legacy TOAs
    #if legacydir and not os.path.exists("epta-legacy"):
    #    os.mkdir("epta-legacy")
    #    shutil.copytree(os.path.join(legacydir, "tims"), "epta-legacy/tims")
    #    shutil.copy(os.path.join(legacydir,
    #                             "%s_t2noise.model" % psrdirs[psrname]),
    #                "epta-legacy")

    # Find parfile
    if args.parfile is not None:
        if not os.path.exists(args.parfile):
            raise errors.InputError("Parfile specified (%s) doesn't exist!" %
                                    args.parfile)
        inparfn = args.parfile
    else:
        # Create parfile
        #inparfn = os.path.join('/homes/plazarus/research/epta-legacy/',
        #                       psrdirs[psrname], "%s.par" % psrdirs[psrname])
        inparfn = reduce_data.PARFILES[psrname]
    #intimfn = os.path.join('/homes/plazarus/research/epta-legacy/',
    #                       psrdirs[psrname], "%s_all.tim" % psrdirs[psrname])

    outparfn = "%s.T2.par" % psrname
    with open(inparfn, 'r') as inff, open(outparfn, 'w') as outff:
        for line in inff:
            # Don't copy over JUMPs or EFACs to 'outff'
            if not line.startswith("JUMP") and \
                    not 'EFAC' in line:
                outff.write(line)
        outff.write("\n".join(EXTRA_PARFILE_LINES))

    template_dir = os.path.join(BASE_TEMPLATE_DIR, psrname)
    for stage in STAGES:
        if stage == "current":
            continue
        for rcvr in RCVRS:
            template_name = "%s_%s_%s.std" % (psrname, rcvr, stage)
            # First, check if templates exists
            if not os.path.isfile(os.path.join(template_dir, template_name)):
                # Make template
                utils.print_info("No template (%s) found!" % template_name, 1)
                try:
                    os.makedirs(template_dir)
                except:
                    pass
                try:
                    print psrname, stage, rcvr
                    stdfn = make_template.make_template(
                        template_dir, psrname, stage, rcvr)
                    utils.print_info("Made template: %s" % stdfn, 1)
                except errors.TemplateGenerationError:
                    pass

    timfns = []
    for stage in STAGES:
        # List files to reduce
        rows = list_files.get_files([psrname], stage)
        print len(rows)
        fns = {}
        # Initialize list of file names for each receiver
        for rcvr in RCVRS:
            fns[rcvr] = []
        for row in rows:
            if row['stage'] not in ('cleaned', 'calibrated'):
                continue
            fn = os.path.join(row['filepath'], row['filename'])
            fns[row['rcvr']].append(fn)
        stagetimfn = "%s_%s.tim" % (psrname, stage)
        print "Opening %s" % stagetimfn
        stagetimff = open(stagetimfn, 'w')
        # Create file listings and generate TOAs
        for rcvr in RCVRS:
            print rcvr, len(fns[rcvr])
            if not fns[rcvr]:
                # No files
                continue
            # Check for existing scrunched files
            toscrunch = []
            scrunchedfns = []
            scrunchdir = os.path.join("scrunched", rcvr)
            for fn in fns[rcvr]:
                scrunchfn = os.path.join(scrunchdir,
                                         os.path.basename(fn) + ext)
                scrunchedfns.append(scrunchfn)
                if not os.path.exists(scrunchfn):
                    toscrunch.append(fn)
            # Scrunch files
            try:
                os.makedirs(scrunchdir)
            except:
                pass
            print "Working on %s %s" % (rcvr, stage)
            for fn in utils.show_progress(toscrunch, width=50):
                # Create a copy of the file with the 'eff_psrix' site
                cmd = ['psredit', '-c', 'site=eff_psrix', '-O', scrunchdir, fn]
                cmd.extend(['-e', fn.split('.')[-1] + ext])
                utils.execute(cmd)
                arfn = os.path.join(scrunchdir, os.path.basename(fn + ext))
                parfn = utils.get_norm_parfile(arfn)
                # Re-install ephemeris
                cmd = ['pam', '-Tp', '-E', parfn, '-m', arfn] + scrunchargs
                utils.execute(cmd)

            toas = []
            mjds = []
            for row in rows:
                if row['rcvr'] != rcvr:
                    continue
                if row['stage'] not in ('cleaned', 'calibrated'):
                    continue
                template_name = "%s_%s_%s.std" % (psrname, rcvr, row['stage'])
                template = os.path.join(template_dir, template_name)
                # Generate TOAs
                fn = os.path.join(scrunchdir, row['filename']) + ext
                print fn
                stdout, stderr = utils.execute([
                    "pat", "-T", "-A", "FDM", "-f", "tempo2", "-C",
                    "rcvr chan", "-d", "-s", template, fn
                ])
                # Parse TOAs
                toalines = stdout.split('\n')
                for line in toalines:
                    toainfo = readers.tempo2_reader(line)
                    if toainfo is not None:
                        # Formatter expects 'file' field to be called 'rawfile'
                        toainfo['rawfile'] = toainfo['file']
                        toainfo['telescope_code'] = toainfo['telescope']
                        toainfo['type'] = stage
                        toainfo['rcvr'] = rcvr
                        toainfo['file_id'] = row['file_id']
                        toainfo['obs_id'] = row['obs_id']
                        toainfo['shortstage'] = row['stage'][:2].upper()
                        if row['stage'] == 'cleaned':
                            toainfo['grp'] = "%s_clean" % rcvr
                        else:
                            toainfo['grp'] = "%s_cal" % rcvr
                        toainfo['chan'] = toainfo['extras']['chan']
                        toas.append(toainfo)
                        mjds.append(toainfo['imjd'])
            # Sort TOAs
            utils.sort_by_keys(toas, ['fmjd', 'imjd'])

            # Format timfile
            sysflag = 'EFF.AS.%(rcvr)s.%(shortstage)s'
            timlines = formatters.tempo2_formatter(toas,
                                                   flags=[
                                                       ('rcvr', '%(rcvr)s'),
                                                       ('type', '%(type)s'),
                                                       ('grp', '%(grp)s'),
                                                       ('sys', sysflag),
                                                       ('obsid', '%(obs_id)d'),
                                                       ('fileid',
                                                        '%(file_id)d'),
                                                       ('chan', '%(chan)s')
                                                   ])

            mjds.sort()
            #offsetmjds = sorted(TIME_OFFSETS.keys())
            #inds = np.searchsorted(mjds, offsetmjds)+1
            # Insert extra lines from back of list
            #for ind, key in reversed(zip(inds, offsetmjds)):
            #    timlines[ind:ind] = ["\n"+TIME_OFFSETS[key]+"\n"]

            # Write out timfile
            timfn = "%s_%s_%s.tim" % (psrname, rcvr, stage)
            with open(timfn, 'w') as ff:
                for line in timlines:
                    ff.write(line + "\n")
            utils.print_info("Wrote out timfile: %s" % timfn)
            timfns.append(timfn)
            stagetimff.write("INCLUDE %s\n" % timfn)
        stagetimff.close()

    #outtimfn = os.path.join("epta-legacy", os.path.basename(intimfn))
    #with open(intimfn, 'r') as inff, open(outtimfn, 'w') as outff:
    #    for line in inff:
    #        outff.write(line)
    #    for rcvr in RCVRS:
    #        timfn = "%s_%s_cleaned.tim" % (psrname, rcvr)
    #        if os.path.exists(timfn):
    #            outff.write("INCLUDE ../%s\n" % timfn)

    # Count TOAs
    #toas = load_toa.parse_timfile(outtimfn, determine_obssystem=False)
    systems = {}
    #for toa in toas:
    #    if toa['is_bad']:
    #        continue
    #    if not 'sys' in toa['extras']:
    #        print toa
    #    else:
    #        nn = systems.get(toa['extras']['sys'], 0)
    #        systems[toa['extras']['sys']] = nn+1

    outparfn = "%s.T2.par" % psrname
    #outparfn2 = os.path.join("epta-legacy", os.path.basename(inparfn))
    with open(inparfn, 'r') as inff, open(outparfn, 'w') as outff:  #, \
        #open(outparfn2, 'w') as outff2:
        for line in inff:
            # Don't copy over JUMPs or EFACs to 'outff'
            # Copy JUMPs and EFACs to 'outff2' and fit
            #if line.startswith("JUMP"):
            #    if "-sys" in line:
            #        obssys = line.split()[2]
            #        if systems.get(obssys, 0):
            #            # Observing system has TOAs
            #            # Replace all system jumps by 0 and set the fit flag
            #            outff2.write(" ".join(line.split()[:3])+" 0 1\n")
            #    else:
            #        outff2.write(line)
            #elif line.startswith("T2EFAC"):
            #    outff2.write(line)
            #elif line.startswith("NITS"):
            #    pass
            #else:
            outff.write(line)
            # Remove fit-flags for 'outff2'
            #outff2.write(" ".join(line.split()[:2])+'\n')
        outff.write("\n".join(EXTRA_PARFILE_LINES))
        #outff2.write("\n".join(["JUMP -sys EFF.AS.%s.CL 0 1" % rcvr for rcvr in RCVRS]))
        #outff2.write("\nNITS 3\n")

    # Create a master timfile
    master_timfn = "%s_all.tim" % psrname
    with open(master_timfn, 'w') as ff:
        for timfn in timfns:
            ff.write("INCLUDE %s\n" % timfn)
    utils.print_info("Wrote out master timfile: %s" % master_timfn)
Exemple #16
0
def update_caldb(db, sourcename, force=False):
    """Check for new calibrator scans. If found update the calibrator database.

        Inputs:
            db: A Database object.
            sourcename: The name of the source to match.
                (NOTE: '_R' will be removed from the sourcename, if present)
            force: Forcefully update the caldb
        
        Outputs:
            caldb: The path to the updated caldb.
    """
    name = utils.get_prefname(sourcename)
    if name.endswith('_R'):
        name = name[:-2]

    # Get the caldb
    caldb = get_caldb(db, name)
    if caldb is None:
        lastupdated = datetime.datetime.min
        outdir = os.path.join(config.output_location, 'caldbs')
        try:
            os.makedirs(outdir)
        except OSError:
            # Directory already exists
            pass
        outfn = '%s.caldb.txt' % name.upper()
        outpath = os.path.join(outdir, outfn)
        insert_new = True
        values = {'sourcename': name,
                  'caldbpath': outdir,
                  'caldbname': outfn}
    else:
        lastupdated = caldb['last_modified']
        outpath = os.path.join(caldb['caldbpath'], caldb['caldbname'])
        insert_new = False
        values = {}

    with db.transaction() as conn:
        if not insert_new:
            # Mark update of caldb as in-progress
            update = db.caldbs.update().\
                        values(status='updating',
                                last_modified=datetime.datetime.now()).\
                        where(db.caldbs.c.caldb_id == caldb['caldb_id'])
            conn.execute(update)

        select = db.select([db.files],
                    from_obj=[db.files.\
                        outerjoin(db.obs,
                            onclause=db.files.c.obs_id ==
                                    db.obs.c.obs_id)]).\
                    where((db.files.c.status.in_(['new', 'done'])) &
                            (db.files.c.stage == 'calibrated') &
                            (db.obs.c.obstype == 'cal') & 
                            (db.obs.c.sourcename == ('%s_R' % name)))
        results = conn.execute(select)
        rows = results.fetchall()
        results.close()

        numnew = 0
        for row in rows:
            if row['added'] > lastupdated:
                numnew += 1

        utils.print_info("Found %d suitable calibrators for %s. "
                         "%d are new." %
                         (len(rows), name, numnew), 2)

        values['numentries'] = len(rows)

        try:
            if numnew or force:
                # Create an updated version of the calibrator database 
                basecaldir = os.path.join(config.output_location,
                                            name.upper()+"_R")
                utils.execute(['pac', '-w', '-u', '.pcal.T', '-k', outpath],
                                dir=basecaldir)
        except:
            #raise
            values['status'] = 'failed'
            if insert_new:
                action = db.caldbs.insert()
            else:
                action = db.caldbs.update().\
                            values(note='%d new entries added' % numnew,
                                    last_modifed=datetime.datetime.now()).\
                            where(db.caldbs.c.caldb_id == caldb['caldb_id'])
            conn.execute(action, values)
        else:
            if insert_new:
                action = db.caldbs.insert()
            else:
                action = db.caldbs.update().\
                            values(status='ready',
                                    note='%d new entries added' % numnew,
                                    last_modified=datetime.datetime.now()).\
                            where(db.caldbs.c.caldb_id == caldb['caldb_id'])
            conn.execute(action, values)
    return outpath
Exemple #17
0
def deep_clean(toclean, chanthresh=None, subintthresh=None, binthresh=None):
    import psrchive  # Temporarily, because python bindings
    # are not available on all computers

    if chanthresh is None:
        chanthresh = config.cfg.clean_chanthresh
    if subintthresh is None:
        subintthresh = config.cfg.clean_subintthresh
    if binthresh is None:
        binthresh = config.cfg.clean_binthresh

    ar = toclean.clone()

    ar.pscrunch()
    ar.remove_baseline()
    ar.dedisperse()

    # Remove profile
    data = ar.get_data().squeeze()
    template = np.apply_over_axes(np.sum, data, (0, 1)).squeeze()
    clean_utils.remove_profile_inplace(ar, template, None)

    ar.dededisperse()

    # First clean channels
    chandata = clean_utils.get_chans(ar, remove_prof=True)
    chanweights = clean_utils.get_chan_weights(ar).astype(bool)
    chanmeans = clean_utils.scale_chans(chandata.mean(axis=1),
                                        chanweights=chanweights)
    chanmeans /= clean_utils.get_robust_std(chanmeans, chanweights)
    chanstds = clean_utils.scale_chans(chandata.std(axis=1),
                                       chanweights=chanweights)
    chanstds /= clean_utils.get_robust_std(chanstds, chanweights)

    badchans = np.concatenate((np.argwhere(np.abs(chanmeans) >= chanthresh), \
                                    np.argwhere(np.abs(chanstds) >= chanthresh)))
    badchans = np.unique(badchans)
    utils.print_info(
        "Number of channels to be de-weighted: %d" % len(badchans), 2)
    for ichan in badchans:
        utils.print_info("De-weighting chan# %d" % ichan, 3)
        clean_utils.zero_weight_chan(ar, ichan)
        clean_utils.zero_weight_chan(toclean, ichan)

    # Next clean subints
    subintdata = clean_utils.get_subints(ar, remove_prof=True)
    subintweights = clean_utils.get_subint_weights(ar).astype(bool)
    subintmeans = clean_utils.scale_subints(subintdata.mean(axis=1), \
                                    subintweights=subintweights)
    subintmeans /= clean_utils.get_robust_std(subintmeans, subintweights)
    subintstds = clean_utils.scale_subints(subintdata.std(axis=1), \
                                    subintweights=subintweights)
    subintstds /= clean_utils.get_robust_std(subintstds, subintweights)

    badsubints = np.concatenate((np.argwhere(np.abs(subintmeans) >= subintthresh), \
                                    np.argwhere(np.abs(subintstds) >= subintthresh)))

    if config.debug.CLEAN:
        plt.subplots_adjust(hspace=0.4)
        chanax = plt.subplot(4, 1, 1)
        plt.plot(np.arange(len(chanmeans)), chanmeans, 'k-')
        plt.axhline(chanthresh, c='k', ls='--')
        plt.axhline(-chanthresh, c='k', ls='--')
        plt.xlabel('Channel Number', size='x-small')
        plt.ylabel('Average', size='x-small')

        plt.subplot(4, 1, 2, sharex=chanax)
        plt.plot(np.arange(len(chanstds)), chanstds, 'k-')
        plt.axhline(chanthresh, c='k', ls='--')
        plt.axhline(-chanthresh, c='k', ls='--')
        plt.xlabel('Channel Number', size='x-small')
        plt.ylabel('Standard Deviation', size='x-small')

        subintax = plt.subplot(4, 1, 3)
        plt.plot(np.arange(len(subintmeans)), subintmeans, 'k-')
        plt.axhline(subintthresh, c='k', ls='--')
        plt.axhline(-subintthresh, c='k', ls='--')
        plt.xlabel('Sub-int Number', size='x-small')
        plt.ylabel('Average', size='x-small')

        plt.subplot(4, 1, 4, sharex=subintax)
        plt.plot(np.arange(len(subintstds)), subintstds, 'k-')
        plt.axhline(subintthresh, c='k', ls='--')
        plt.axhline(-subintthresh, c='k', ls='--')
        plt.xlabel('Sub-int Number', size='x-small')
        plt.ylabel('Standard Deviation', size='x-small')
        plt.show()

    badsubints = np.unique(badsubints)
    utils.print_info(
        "Number of sub-ints to be de-weighted: %d" % len(badsubints), 2)
    for isub in badsubints:
        utils.print_info("De-weighting subint# %d" % isub, 3)
        clean_utils.zero_weight_subint(ar, isub)
        clean_utils.zero_weight_subint(toclean, isub)

    # Re-dedisperse the data
    ar.dedisperse()

    # Now replace hot bins
    utils.print_info("Will find and clean 'hot' bins", 2)
    clean_utils.clean_hot_bins(toclean, thresh=binthresh)
Exemple #18
0
 def run(self, ar):
     utils.print_info(
         'Cleaning %s with %s' % (ar.get_filename(), self.name), 1)
     self._clean(ar)
Exemple #19
0
def main():
    db = database.Database()

    obs_id = args.obs_id
    obsinfo = get_obsinfo(db, obs_id)
    datestr = utils.mjd_to_datetime(obsinfo['start_mjd']).strftime("%Y%m%d")
    subdirs = [
        datetime.datetime.now().strftime("%Y%m%dT%H:%M:%S"), datestr,
        obsinfo['sourcename']
    ]
    subdirs.reverse()
    backupdir = os.path.join(config.output_location, "removed", *subdirs)
    print "Will remove database entries for obs ID %d" % obs_id
    print "Back-ups of existing files will be copied to %s" % backupdir

    log_ids, logfns = get_loginfo(db, obs_id)
    assert len(log_ids) == len(logfns)
    print "Will also remove %d logs" % len(log_ids)
    tmp = "\n".join(["Log ID: %d; %s" % xx for xx in zip(log_ids, logfns)])
    utils.print_info(tmp, 1)

    rows = get_fileinfo(db, obs_id)
    file_ids = [
        row['file_id'] for row in rows if not ((row['stage'] == 'grouped') or (
            (row['stage'] == 'combined') and (not row['is_deleted'])))
    ]
    file_ids_left = [
        row['file_id'] for row in rows if row['file_id'] not in file_ids
    ]
    fns = [
        os.path.join(row['filepath'], row['filename']) for row in rows
        if row['file_id'] in file_ids
    ]
    print "Will also remove %d files" % len(rows)
    tmp = "\n".join(["File ID: %d; %s" % xx for xx in zip(file_ids, fns)])
    utils.print_info(tmp, 1)

    diag_ids, diagfns = get_diaginfo(db, file_ids)
    assert len(diag_ids) == len(diagfns)
    print "Will also remove %d diagnostics" % len(diag_ids)
    tmp = "\n".join(
        ["Diagnostic ID: %d; %s" % xx for xx in zip(diag_ids, diagfns)])
    utils.print_info(tmp, 1)

    qctrl_ids = get_qcinfo(db, file_ids)
    print "Will also remove %d quality control entries" % len(qctrl_ids)
    tmp = "\n".join(["QC ID: %d" % xx for xx in qctrl_ids])
    utils.print_info(tmp, 1)

    reatt_ids = get_reattinfo(db, file_ids)
    print "Will also remove %d re-attempt entries" % len(reatt_ids)
    tmp = "\n".join(["Re-attempt ID: %d" % xx for xx in reatt_ids])
    utils.print_info(tmp, 1)

    mysqldumpstr = dump_db_entries(db, obs_id, log_ids, file_ids, diag_ids)
    utils.print_info("MySQL dump:\n%s" % mysqldumpstr, 2)

    if not args.dryrun:
        try:
            # Make back-up directory
            oldumask = os.umask(0007)
            os.makedirs(backupdir)
            os.umask(oldumask)
            # Write mysql dump
            with open(os.path.join(backupdir, "db_entries.sql"), 'w') as ff:
                ff.write(mysqldumpstr)
            # Move files
            for src in fns + logfns + diagfns:
                fn = os.path.basename(src)
                dest = os.path.join(backupdir, fn)
                if os.path.isfile(src):
                    # Make sure file exists (it may have already been deleted)
                    shutil.move(src, dest)
            # Remove entries from the database
            with db.transaction() as conn:
                # Remove diagnostic entries
                delete = db.diagnostics.delete().\
                            where(db.diagnostics.c.diagnostic_id.in_(diag_ids))
                results = conn.execute(delete)
                results.close()
                # Remove any quality control entries in the database
                delete = db.qctrl.delete().\
                            where(db.qctrl.c.qctrl_id.in_(qctrl_ids))
                results = conn.execute(delete)
                results.close()
                # Remove obs' 'current_file_id' entry
                update = db.obs.update().\
                            where(db.obs.c.obs_id == row['obs_id']).\
                            values(current_file_id=None)
                results = conn.execute(update)
                results.close()
                # Remove file entries
                # (newest first because of foreign key constraints - parent_file_id column)
                for row in rows:
                    if (row['stage'] == 'grouped') or \
                            ((row['stage'] == 'combined') and (not row['is_deleted'])):
                        # Leave grouped files and undeleted combined files
                        pass
                    else:
                        delete = db.files.delete().\
                                    where(db.files.c.file_id == row['file_id'])
                        results = conn.execute(delete)
                        results.close()
                #
                # Do not delete log entries from the database even though log file was moved
                #
                # Update newest file left to have status new
                update = db.files.update().\
                            where(db.files.c.file_id == max(file_ids_left)).\
                            values(status='new',
                                   note='Data are being reprocessed.',
                                   last_modified=datetime.datetime.now())
                conn.execute(update)

        except:
            print "Error encountered! Will attempt to un-move files."
            # Try to unmove files
            for src in fns + logfns + diagfns:
                fn = os.path.basename(src)
                dest = os.path.join(backupdir, fn)
                if os.path.isfile(dest) and not os.path.isfile(src):
                    shutil.move(dest, src)
            if os.path.isdir(backupdir):
                try:
                    os.remove(os.path.join(backupdir, "db_entries.sql"))
                    os.rmdir(backupdir)
                except:
                    print "Could not remove back-up dir %s" % backupdir
            raise
        else:
            print "Successfully reseted obs ID: %d" % obs_id
Exemple #20
0
def main():
    if args.dir_id is not None:
        # Get directory path from database
        dir_toremove = get_dir_from_id(args.dir_id)
        dir_id = arg.dir_id
    else:
        dir_toremove = os.path.join(config.base_rawdata_dir, args.dir)
        dir_id = get_id_from_dir(args.dir)
    if not dir_toremove.startswith(config.base_rawdata_dir):
        raise ValueError("Directory to remove (%s) is not in the raw "
                         "data directory (%s)" % 
                         (dir_toremove, config.base_rawdata_dir))

    subdirs = [datetime.datetime.now().strftime("%Y%m%dT%H:%M:%S")]
    tmp = dir_toremove
    while tmp and (os.path.abspath(config.base_rawdata_dir) != os.path.abspath(tmp)):
        tmp, tmp2 = os.path.split(tmp)
        subdirs.append(tmp2)
    subdirs.reverse()
    backupdir = os.path.join(config.output_location, "removed", *subdirs)
    print "Will remove database entries for data in %s" % dir_toremove
    print "Back-ups of existing files will be copied to %s" % backupdir
    
    db = database.Database()

    obs_ids = get_obsinfo(db, dir_id)
    print "Will also remove %d observations" % len(obs_ids)
    tmp = ", ".join(["%d" % xx for xx in obs_ids])
    utils.print_info("Obs IDs: %s" % tmp, 1)

    log_ids, logfns = get_loginfo(db, obs_ids)
    assert len(log_ids) == len(logfns)
    print "Will also remove %d logs" % len(log_ids)
    tmp = "\n".join(["Log ID: %d; %s" % xx for xx in zip(log_ids, logfns)])
    utils.print_info(tmp, 1)

    file_ids, fns = get_fileinfo(db, obs_ids)
    assert len(file_ids) == len(fns)
    print "Will also remove %d files" % len(file_ids)
    tmp = "\n".join(["File ID: %d; %s" % xx for xx in zip(file_ids, fns)])
    utils.print_info(tmp, 1)
    
    diag_ids, diagfns = get_diaginfo(db, file_ids)
    assert len(diag_ids) == len(diagfns)
    print "Will also remove %d diagnostics" % len(diag_ids)
    tmp = "\n".join(["Diagnostic ID: %d; %s" % xx for xx in zip(diag_ids, diagfns)])
    utils.print_info(tmp, 1)
    
    mysqldumpstr = dump_db_entries(db, dir_id, obs_ids, log_ids, file_ids, diag_ids)
    utils.print_info("MySQL dump:\n%s" % mysqldumpstr, 2)
    
    if not args.dryrun:
        try:
            # Make back-up directory
            os.makedirs(backupdir)
            # Write mysql dump
            with open(os.path.join(backupdir, "db_entries.sql"), 'w') as ff:
                ff.write(mysqldumpstr)
            # Move files
            for src in fns+logfns+diagfns:
                fn = os.path.basename(src)
                dest = os.path.join(backupdir, fn)
                if os.path.isfile(src):
                    # Make sure file exists (it may have already been deleted)
                    shutil.move(src, dest)
            # Remove entries from the database
            with db.transaction() as conn:
                # Remove diagnostic entries
                delete = db.diagnostics.delete().\
                            where(db.diagnostics.c.diagnostic_id.in_(diag_ids))
                results = conn.execute(delete)
                results.close()
                # Remove file entries 
                # (newest first because of foreign key constraints - parent_file_id column)
                for file_id in reversed(sorted(file_ids)):
                    delete = db.files.delete().\
                                where(db.files.c.file_id == file_id)
                    results = conn.execute(delete)
                    results.close()
                # logs
                delete = db.logs.delete().\
                            where(db.logs.c.log_id.in_(log_ids))
                results = conn.execute(delete)
                results.close()
                # obs
                delete = db.obs.delete().\
                            where(db.obs.c.obs_id.in_(obs_ids))
                results = conn.execute(delete)
                results.close()
                # directory
                delete = db.directories.delete().\
                            where(db.directories.c.dir_id == dir_id)
                results = conn.execute(delete)
                results.close()
        except:
            print "Error encountered! Will attempt to un-move files."
            # Try to unmove files
            for src in fns+logfns+diagfns:
                fn = os.path.basename(src)
                dest = os.path.join(backupdir, fn)
                if os.path.isfile(dest) and not os.path.isfile(src):
                    shutil.move(dest, src)
            if os.path.isdir(backupdir):
                try:
                    os.rmdir(backupdir)
                except:
                    print "Could not remove back-up dir %s" % backupdir
            raise
        else:
            print "Successfully scrubbed %s (ID: %d)" % (dir_toremove, dir_id)
Exemple #21
0
def combine_subints(subdirs, subints, parfn=None, outdir=None):
    """Combine sub-ints from various freq sub-band directories.
        The input lists are as created by
        'group_subband_dirs' or read-in by 'read_listing'.

        Inputs:
            subdirs: List of sub-band directories containing 
                sub-ints to combine
            subints: List of subint files to be combined.
                (NOTE: These are the file name only (i.e. no path)
                    Each file listed should appear in each of the
                    subdirs.)
            parfn: New ephemeris to install when combining subints.
                (Default: Use ephemeris in archive file's header)
            outdir: Directory to output combined file.
                (Default: Current working directory)
        
        Output:
            outfn: The name of the combined file.
    """
    if outdir is None:
        outdir = os.getcwd()
    subints = sorted(subints)
    tmpdir = tempfile.mkdtemp(suffix="_combine", dir=config.tmp_directory)
    devnull = open(os.devnull)
    try:
        cmbsubints = []

        # Try to normalise the archive's parfile
        try:
            if parfn is None:
                arfn = os.path.join(subdirs[0], subints[0])
                normparfn = utils.get_norm_parfile(arfn)
            else:
                normparfn = utils.normalise_parfile(parfn)
        except errors.InputError:
            # No parfile present
            parargs = []
        else:
            parargs = ['-E', normparfn]

        utils.print_info("Adding freq sub-bands for each sub-int...", 2)
        for ii, subint in enumerate(utils.show_progress(subints, width=50)):
            to_combine = [os.path.join(path, subint) for path in subdirs]
            outfn = os.path.join(tmpdir, "combined_%s" % subint)
            cmbsubints.append(outfn)
            utils.execute(['psradd', '-q', '-R', '-o', outfn] + parargs +
                          to_combine,
                          stderr=devnull)
        arf = utils.ArchiveFile(
            os.path.join(tmpdir, "combined_%s" % subints[0]))
        outfn = os.path.join(
            outdir, "%s_%s_%s_%05d_%dsubints.cmb" %
            (arf['name'], arf['band'], arf['yyyymmdd'], arf['secs'],
             len(subints)))
        utils.print_info("Combining %d sub-ints..." % len(cmbsubints), 1)
        utils.execute(['psradd', '-q', '-o', outfn] + cmbsubints,
                      stderr=devnull)
    except:
        raise  # Re-raise the exception
    finally:
        if debug.is_on('reduce'):
            warnings.warn("Not cleaning up temporary directory (%s)" % tmpdir, \
                        errors.CoastGuardWarning)
        else:
            utils.print_info("Removing temporary directory (%s)" % tmpdir, 2)
            shutil.rmtree(tmpdir)
    return outfn
def main():
    if args.outfn is not None:
        outfile = open(args.outfn, 'w')
    else:
        outfile = sys.stdout

    if args.interp_method not in ("linear", "nearest", "quadratic", "cubic",
                                  "median"):
        # Assume a file of paramters is provided
        fitfn = args.interp_method
        if os.path.isfile(fitfn):
            args.interp_method = "file"
            fitparams = np.loadtxt(fitfn, unpack=False)
            get_correction = maser_gps_fit_factory(fitparams)
        else:
            raise ValueError("Interpolation method (%s) is not recognized "
                             "nor is it a file of parameters!" %
                             args.interp_method)

    if args.include_clock_offsets:
        clock_offsets = CLOCK_OFFSETS
        clock_mjds = [float(clk[0]) for clk in CLOCK_OFFSETS if np.isfinite(clk[0])] + \
                     [float(clk[1]) for clk in CLOCK_OFFSETS if np.isfinite(clk[1])]
        clock_mjds = sorted(set(clock_mjds))
    else:
        clock_offsets = []
        clock_mjds = []

    end_mjd = args.end_mjd
    if end_mjd is None:
        end_mjd = rsutils.mjdnow()
    mjds = get_mjds(args.start_mjd,
                    end_mjd,
                    args.interval,
                    args.num_per_day,
                    additional=clock_mjds)
    curr = None
    if args.include_clock_offsets:
        outfile.write("# UTC(EFFIX) UTC(GPS)\n")
        outfile.write("# Effelsberg Asterix/PSRix clock correction file\n")
    else:
        outfile.write("# UTC(EFF) UTC(GPS)\n")
        outfile.write("# Effelsberg clock correction file\n")
    outfile.write("# Generated on %s with %s (by P. Lazarus) \n" %
                  (datetime.datetime.now().strftime("%B %d, %Y"), __file__))
    outfile.write("# The following clock offsets are included:\n")
    if clock_offsets:
        for start_mjd, end_mjd, clkoff in clock_offsets:
            outfile.write("#    MJD: %5s to %5s; offset=%g s\n" %
                          (start_mjd, end_mjd, clkoff))
    else:
        outfile.write("#    None\n")
    outfile.write("#\n")

    if args.include:
        with open(args.include, 'r') as inclff:
            for line in inclff:
                line = line.strip()
                if not line or line.startswith('#'):
                    continue
                mjd = float(line.split()[0])
                if mjd < mjds[0]:
                    outfile.write(line + "\n")

    # Include clock offsets
    clkoff = 0

    for mjd in mjds:
        imjd = int(mjd)
        try:
            if args.interp_method == "file":
                # File containing parameters
                # get_correction is defined above when the fit-file is read
                pass
            else:
                if curr != imjd:
                    utils.print_info(
                        "Getting maser corrections for MJD %05d" % imjd, 1)
                    # Get corrections
                    data = get_maser_data(imjd)
                    if args.interp_method == "median":
                        get_correction = lambda mjd: np.median(data[:, 1])
                    elif args.interp_method in ("linear", "nearest",
                                                "quadratic", "cubic"):
                        get_correction = interpolate.interp1d(
                            data[:, 0], data[:, 1], kind=args.interp_method)
                curr = imjd
            correction = get_correction(mjd)
            if correction > 0.5:
                correction -= 1
            elif correction < -0.5:
                correction += 1
                correction = -correction

            if float(mjd) in clock_mjds:
                if clkoff:
                    outfile.write("%.6f\t%.12e # Clock offset: %g s\n" %
                                  (mjd, correction + clkoff, clkoff))
                else:
                    outfile.write("%.6f\t%.12e\n" % (mjd, correction))

            for start_mjd, end_mjd, offset in clock_offsets:
                if start_mjd <= mjd < end_mjd:
                    clkoff = offset
                    break
            if clkoff:
                outfile.write("%.6f\t%.12e # Clock offset: %g s\n" %
                              (mjd, correction + clkoff, clkoff))
            else:
                outfile.write("%.6f\t%.12e\n" % (mjd, correction))
        except NoMaserFileFound:
            outfile.write("# Cannot determine clock correction for MJD %g: " \
                          "No maser file found for MJD %d\n" % (mjd, imjd))
        except NoMaserData:
            outfile.write("# Cannot determine clock correction for MJD %g: " \
                          "No maser data parsed from file for MJD %d\n" % (mjd, imjd))
        except ValueError, exc:
            outfile.write("# Cannot determine clock correction for MJD %g: " \
                          "%s\n" % (mjd, str(exc)))
Exemple #23
0
    def _clean(self, ar):
        #        plot = self.configs.plot
        #        if plot:
        #            import matplotlib.pyplot as plt
        patient = ar.clone()
        patient.pscrunch()
        patient.remove_baseline()

        # Remove profile from dedispersed data
        patient.dedisperse()
        utils.print_info('Loading template', 2)
        data = patient.get_data().squeeze()
        if self.configs.template is None:
            # Sum over all axes except last, which is phase bins
            template = np.apply_over_axes(np.sum, data,
                                          tuple(range(data.ndim -
                                                      1))).squeeze()
            # smooth data
            template = savgol_filter(template, 5, 1)
        else:
            template_ar = psrchive.Archive_load(self.configs.template)
            template_ar.pscrunch()
            template_ar.remove_baseline()
            template_ar.dedisperse()
            if len(template_ar.get_frequencies()) > 1 and len(
                    template_ar.get_frequencies()) < len(
                        patient.get_frequencies()):
                utils.print_info(
                    "Template channel number doesn't match data... f-scrunching!",
                    2)
                template_ar.fscrunch()
            template_data = template_ar.get_data().squeeze()
            template = np.apply_over_axes(np.sum, template_data,
                                          tuple(range(template_data.ndim -
                                                      1))).squeeze()
            # make sure template is 1D
            if len(np.shape(template)) > 1:  # sum over frequencies too
                template_ar.fscrunch()
                utils.print_info(
                    "2D template found. Assuming it has same frequency coverage and channels as data!",
                    2)
                template_phs = np.apply_over_axes(
                    np.sum, template_data,
                    tuple(range(template_data.ndim - 1))).squeeze()
            else:
                template_phs = template

        utils.print_info('Estimating template and profile phase offset', 2)
        if self.configs.template is None:
            phase_offset = 0
        else:
            # Calculate phase offset of template in number of bins, using full obs
            # Get profile data of full obs
            profile = np.apply_over_axes(np.sum, data,
                                         tuple(range(data.ndim -
                                                     1))).squeeze()
            if np.shape(template_phs) != np.shape(profile):
                utils.print_info(
                    'Template and profile have different numbers of phase bins',
                    2)
            #err = (lambda (amp, phs, base): amp*clean_utils.fft_rotate(template_phs, phs) + base - profile)
            err = (lambda amp_phs: amp_phs[0] * clean_utils.fft_rotate(
                template_phs, amp_phs[1]) - profile)
            amp_guess = np.median(profile) / np.median(template_phs)
            phase_guess = -(np.argmax(profile) - np.argmax(template_phs))
            amp_phs_guess = [amp_guess, phase_guess]
            #params, status = leastsq(err, [amp_guess, phase_guess, np.min(profile) - np.min(template_phs)])
            params, status = leastsq(err, amp_phs_guess)
            phase_offset = params[1]
            utils.print_info(
                'Template phase offset = {0}'.format(round(phase_offset, 3)),
                2)

        utils.print_info('Removing profile from patient', 2)
        #        if plot:
        #            preop_patient = patient.clone()
        #            preop_weights = preop_patient.get_weights()
        clean_utils.remove_profile_inplace(patient, template, phase_offset)

        utils.print_info('Accessing weights and applying to patient', 2)
        # re-set DM to 0
        # patient.dededisperse()

        # Get weights
        weights = patient.get_weights()
        # Get data (select first polarization - recall we already P-scrunched)
        data = patient.get_data()[:, 0, :, :]
        data = clean_utils.apply_weights(data, weights)
        #        if plot:
        #            preop_data = preop_patient.get_data()[:,0,:,:]
        #            preop_patient = []  # clear for the sake of memory
        #            preop_data = clean_utils.apply_weights(preop_data, weights)

        # Mask profiles where weight is 0
        mask_2d = np.bitwise_not(np.expand_dims(weights, 2).astype(bool))
        mask_3d = mask_2d.repeat(ar.get_nbin(), axis=2)
        data = np.ma.masked_array(data, mask=mask_3d)
        #        if plot:
        #            preop_data = np.ma.masked_array(preop_data, mask=mask_3d)

        utils.print_info('Masking on-pulse region as determined from template',
                         2)
        # consider residual only in off-pulse region
        if len(np.shape(template)) > 1:  # sum over frequencies
            utils.print_info(
                'Estimating on-pulse region by f-scrunching 2D template', 2)
            template_ar.fscrunch()
            template_1D = np.apply_over_axes(np.sum, template_ar.get_data(),
                                             (0, 1)).squeeze()
        else:
            template_1D = template
        # Rotate template by apropriate amount
        template_rot = clean_utils.fft_rotate(template_1D,
                                              phase_offset).squeeze()
        # masked_template = np.ma.masked_greater(template_rot, np.min(template_rot) + 0.01*np.ptp(template_rot))
        masked_template = np.ma.masked_greater(template_rot,
                                               np.median(template_rot))
        masked_std = np.ma.std(masked_template)
        # use this std of masked data as cutoff
        masked_template = np.ma.masked_greater(
            template_rot,
            np.median(template_rot) + masked_std)
        #        if plot:
        #            plt.figure(figsize=(10, 5))
        #            plt.subplot(1, 2, 1)
        #            plt.plot(np.apply_over_axes(np.sum, preop_data, tuple(range(data.ndim - 1))).squeeze(), alpha=1)
        #            # Do fit again to scale template
        #            subchan, err, params = clean_utils.remove_profile1d(np.apply_over_axes(np.sum, preop_data, (0, 1)).squeeze(), 0, 0, template_rot, 0, return_params=True)
        #            # plt.plot(params[0]*template_rot + params[1], alpha=0.5)
        #            # plt.plot(params[0]*masked_template + params[1], 'k')
        #            plt.plot(params[0]*template_rot, alpha=0.5)
        #            plt.plot(params[0]*masked_template, 'k')
        #            plt.legend(('Pre-op data', 'Scaled and rotated template', 'Masked template'))
        # Loop through chans and subints to mask on-pulse phase bins
        for ii in range(0, np.shape(data)[0]):
            for jj in range(0, np.shape(data)[1]):
                data.mask[ii, jj, :] = masked_template.mask
        data = np.ma.masked_array(data, mask=data.mask)

        #        if plot:
        #            plt.subplot(1, 2, 2)
        #            plt.plot(np.apply_over_axes(np.ma.sum, data, tuple(range(data.ndim - 1))).squeeze())
        #            plt.title("Residual data")
        #            plt.savefig('data_and_template.png')

        utils.print_info(
            'Calculating robust statistics to determine where RFI removal is required',
            2)
        # RFI-ectomy must be recommended by average of tests
        # BWM: Ok, so this is where the magical stuff actually happens - need to know actually WHAT are the comprehensive stats
        # DJR: At this stage the stats are; (found to work well experimentally)
        #          geometric mean, peak-to-peak, standard deviation, normaltest.
        #      In original coast_guard they were;
        #          mean, peak-to-peak, standard deviation, and max value of FFT
        avg_test_results = clean_utils.comprehensive_stats(data, axis=2, \
                                    chanthresh=self.configs.chanthresh, \
                                    subintthresh=self.configs.subintthresh, \
                                    chan_order=self.configs.chan_order, \
                                    chan_breakpoints=self.configs.chan_breakpoints, \
                                    chan_numpieces=self.configs.chan_numpieces, \
                                    subint_order=self.configs.subint_order, \
                                    subint_breakpoints=self.configs.subint_breakpoints, \
                                    subint_numpieces=self.configs.subint_numpieces, \
                                    )

        utils.print_info('Applying RFI masking weights to archive', 2)
        for (isub, ichan) in np.argwhere(avg_test_results >= 1):
            # Be sure to set weights on the original archive, and
            # not the clone we've been working with.
            integ = ar.get_Integration(int(isub))
            integ.set_weight(int(ichan), 0.0)

        freq_fraczap = clean_utils.freq_fraczap(ar)