コード例 #1
0
from ch_util import andata
from ch_util import tools
from ch_util import cal_utils
from ch_util import timing
from ch_util import rfi

from complex_gain import sutil
from complex_gain.temps import TempData

###################################################
# default variables
###################################################

DEFAULTS = NameSpace(
    load_yaml_config(
        os.path.join(os.path.dirname(os.path.realpath(__file__)),
                     'defaults.yaml') + ':joint_regression'))

LOG_FILE = os.environ.get(
    'JOINT_REGRESSION_LOG_FILE',
    os.path.join(os.path.dirname(os.path.realpath(__file__)),
                 'joint_regression.log'))

DEFAULT_LOGGING = {
    'formatters': {
        'std': {
            'format': "%(asctime)s %(levelname)s %(name)s: %(message)s",
            'datefmt': "%m/%d %H:%M:%S"
        },
    },
    'handlers': {
コード例 #2
0
def main(config_file=None, logging_params=DEFAULT_LOGGING):

    # Load config
    config = DEFAULTS.deepcopy()
    if config_file is not None:
        print(config_file)
        config.merge(NameSpace(load_yaml_config(config_file)))

    # Setup logging
    log.setup_logging(logging_params)
    logger = log.get_logger(__name__)

    timer = Timer(logger)

    # Load data
    sfile = config.data.filename if os.path.isabs(
        config.data.filename) else os.path.join(config.directory,
                                                config.data.filename)
    sdata = StabilityData.from_file(sfile)

    ninput, ntime = sdata['tau'].shape

    # Load temperature data
    tfile = (config.temperature.filename
             if os.path.isabs(config.temperature.filename) else os.path.join(
                 config.directory, config.temperature.filename))

    tkeys = ['flag', 'data_flag', 'outlier']
    if config.temperature.load:
        tkeys += config.temperature.load

    tdata = TempData.from_acq_h5(tfile, datasets=tkeys)

    # Query layout database
    inputmap = tools.get_correlator_inputs(ephemeris.unix_to_datetime(
        np.median(sdata.time[:])),
                                           correlator='chime')

    good_input = np.flatnonzero(np.any(sdata['flags']['tau'][:], axis=-1))
    pol = sutil.get_pol(sdata, inputmap)
    npol = len(pol)

    mezz_index, crate_index = sutil.get_mezz_and_crate(sdata, inputmap)

    if config.mezz_ref.enable:
        phase_ref = [
            ipol[mezz_index[ipol] == iref]
            for ipol, iref in zip(pol, config.mezz_ref.mezz)
        ]
    else:
        phase_ref = config.data.phase_ref

    # Load timing
    if config.timing.enable:

        # Extract filenames from config
        timing_files = [
            tf if os.path.isabs(tf) else os.path.join(config.directory, tf)
            for tf in config.timing.files
        ]
        timing_files_hpf = [
            os.path.join(os.path.dirname(tf), 'hpf', os.path.basename(tf))
            for tf in timing_files
        ]
        timing_files_lpf = [
            os.path.join(os.path.dirname(tf), 'lpf', os.path.basename(tf))
            for tf in timing_files
        ]

        # If requested, add the timing data back into the delay data
        if config.timing.add.enable:

            timer.start("Adding timing data to delay measurements.")

            ns_tau, _, ns_flag, ns_inputs = sutil.get_timing_correction(
                sdata, timing_files, **config.timing.add.kwargs)

            index = timing.map_input_to_noise_source(sdata.index_map['input'],
                                                     ns_inputs)

            timing_tau = ns_tau[index, :]
            timing_flag = ns_flag[index, :]
            for ipol, iref in zip(pol, config.data.phase_ref):
                timing_tau[ipol, :] = timing_tau[ipol, :] - timing_tau[
                    iref, np.newaxis, :]
                timing_flag[ipol, :] = timing_flag[ipol, :] & timing_flag[
                    iref, np.newaxis, :]

            sdata['tau'][:] = sdata['tau'][:] + timing_tau
            sdata['flags']['tau'][:] = sdata['flags']['tau'][:] & timing_flag

            timer.stop()

        # Extract the dependent variables from the timing dataset
        timer.start("Calculating timing dependence.")

        if config.timing.sep_delay:
            logger.info("Fitting HPF and LPF timing correction separately.")
            files = timing_files_hpf
            files2 = timing_files_lpf
        else:
            files2 = None
            if config.timing.hpf_delay:
                logger.info("Using HPF timing correction for delay.")
                files = timing_files_hpf
            elif config.timing.lpf_delay:
                logger.info("Using LPF timing correction for delay.")
                files = timing_files_lpf
            else:
                logger.info("Using full timing correction for delay.")
                files = timing_files

        kwargs = {}
        if config.timing.lpf_amp:
            logger.info("Using LPF timing correction for amplitude.")
            kwargs['afiles'] = timing_files_lpf
        elif config.timing.hpf_amp:
            logger.info("Using HPF timing correction for amplitude.")
            kwargs['afiles'] = timing_files_hpf
        else:
            logger.info("Using full timing correction for amplitude.")
            kwargs['afiles'] = timing_files

        for key in ['ns_ref', 'inter_cmn', 'fit_amp', 'ref_amp', 'cmn_amp']:
            if key in config.timing:
                kwargs[key] = config.timing[key]

        xtiming, xtiming_flag, xtiming_group = sutil.timing_dependence(
            sdata, files, inputmap, **kwargs)

        if files2 is not None:
            logger.info("Calculating second timing dependence.")
            kwargs['fit_amp'] = False
            xtiming2, xtiming2_flag, xtiming2_group = sutil.timing_dependence(
                sdata, files2, inputmap, **kwargs)

            xtiming = np.concatenate((xtiming, xtiming2), axis=-1)
            xtiming_flag = np.concatenate((xtiming_flag, xtiming2_flag),
                                          axis=-1)
            xtiming_group = np.concatenate((xtiming_group, xtiming2_group),
                                           axis=-1)

        timer.stop()

    else:
        xtiming = None
        xtiming_flag = None
        xtiming_group = None

    # Reference delay data to mezzanine
    if config.mezz_ref.enable:

        timer.start("Referencing delay measurements to mezzanine.")

        for ipol, iref in zip(pol, config.mezz_ref.mezz):

            this_mezz = ipol[mezz_index[ipol] == iref]

            wmezz = sdata['flags']['tau'][this_mezz, :].astype(np.float32)

            norm = np.sum(wmezz, axis=0)

            taut_mezz = np.sum(wmezz * sdata['tau'][this_mezz, :],
                               axis=0) * tools.invert_no_zero(norm)
            flagt_mezz = norm > 0.0

            sdata['tau'][
                ipol, :] = sdata['tau'][ipol, :] - taut_mezz[np.newaxis, :]
            sdata['flags']['tau'][ipol, :] = sdata['flags']['tau'][
                ipol, :] & flagt_mezz[np.newaxis, :]

        timer.stop()

    # Load cable monitor
    if config.cable_monitor.enable:

        timer.start("Calculating cable monitor dependence.")

        cbl = timing.TimingCorrection.from_acq_h5(
            config.cable_monitor.filename)

        kwargs = {'include_diff': config.cable_monitor.include_diff}

        xcable, xcable_flag, xcable_group = sutil.cable_monitor_dependence(
            sdata, cbl, **kwargs)

        timer.stop()

    else:
        xcable = None
        xcable_flag = None
        xcable_group = None

    # Load NS distance
    if config.ns_distance.enable:

        timer.start("Calculating NS distance dependence.")

        kwargs = {}
        kwargs['phase_ref'] = phase_ref

        for key in [
                'sensor', 'temp_field', 'sep_cyl', 'sep_feed',
                'include_offset', 'include_ha'
        ]:
            if key in config.ns_distance:
                kwargs[key] = config.ns_distance[key]

        if config.ns_distance.use_cable_monitor:
            kwargs['is_cable_monitor'] = True
            kwargs['use_alpha'] = config.ns_distance.use_alpha
            nsx = timing.TimingCorrection.from_acq_h5(
                config.cable_monitor.filename)
        else:
            kwargs['is_cable_monitor'] = False
            nsx = tdata

        xdist, xdist_flag, xdist_group = sutil.ns_distance_dependence(
            sdata, nsx, inputmap, **kwargs)

        if (config.ns_distance.deriv
                is not None) and (config.ns_distance.deriv > 0):

            for dd in range(1, config.ns_distance.deriv + 1):

                d_xdist, d_xdist_flag, d_xdist_group = sutil.ns_distance_dependence(
                    sdata, tdata, inputmap, deriv=dd, **kwargs)

                tind = np.atleast_1d(1)
                xdist = np.concatenate((xdist, d_xdist[:, :, tind]), axis=-1)
                xdist_flag = xnp.concatenate(
                    (xdist_flag, d_xdist_flag[:, :, tind]), axis=-1)
                xdist_group = np.concatenate(
                    (xdist_group, d_xdist_group[:, tind]), axis=-1)

        timer.stop()

    else:
        xdist = None
        xdist_flag = None
        xdist_group = None

    # Load temperatures
    if config.temperature.enable:

        timer.start("Calculating temperature dependence.")

        xtemp, xtemp_flag, xtemp_group, xtemp_name = sutil.temperature_dependence(
            sdata,
            tdata,
            config.temperature.sensor,
            field=config.temperature.temp_field,
            inputmap=inputmap,
            phase_ref=phase_ref,
            check_hut=config.temperature.check_hut)

        if (config.temperature.deriv
                is not None) and (config.temperature.deriv > 0):

            for dd in range(1, config.temperature.deriv + 1):

                d_xtemp, d_xtemp_flag, d_xtemp_group, d_xtemp_name = sutil.temperature_dependence(
                    sdata,
                    tdata,
                    config.temperature.sensor,
                    field=config.temperature.temp_field,
                    deriv=dd,
                    inputmap=inputmap,
                    phase_ref=phase_ref,
                    check_hut=config.temperature.check_hut)

                xtemp = np.concatenate((xtemp, d_xtemp), axis=-1)
                xtemp_flag = xnp.concatenate((xtemp_flag, d_xtemp_flag),
                                             axis=-1)
                xtemp_group = np.concatenate((xtemp_group, d_xtemp_group),
                                             axis=-1)
                xtemp_name += d_xtemp_name

        timer.stop()

    else:
        xtemp = None
        xtemp_flag = None
        xtemp_group = None
        xtemp_name = None

    # Combine into single feature matrix
    x, coeff_name = _concatenate(xdist,
                                 xtemp,
                                 xcable,
                                 xtiming,
                                 name_xtemp=xtemp_name)

    x_group, _ = _concatenate(xdist_group, xtemp_group, xcable_group,
                              xtiming_group)

    x_flag, _ = _concatenate(xdist_flag, xtemp_flag, xcable_flag, xtiming_flag)
    x_flag = np.all(x_flag, axis=-1) & sdata.flags['tau'][:]

    nfeature = x.shape[-1]

    logger.info("Fitting %d features." % nfeature)

    # Save data
    if config.preliminary_save.enable:

        if config.preliminary_save.filename is not None:
            ofile = (config.preliminary_save.filename if os.path.isabs(
                config.preliminary_save.filename) else os.path.join(
                    config.directory, config.preliminary_save.filename))
        else:
            ofile = os.path.splitext(
                sfile)[0] + '_%s.h5' % config.preliminary_save.suffix

        sdata.save(ofile, mode='w')

    # Subtract mean
    if config.mean_subtract:
        timer.start("Subtracting mean value.")

        tau, mu_tau, mu_tau_flag = sutil.mean_subtract(sdata,
                                                       sdata['tau'][:],
                                                       x_flag,
                                                       use_calibrator=True)

        mu_x = np.zeros(mu_tau.shape + (nfeature, ), dtype=x.dtype)
        mu_x_flag = np.zeros(mu_tau.shape + (nfeature, ), dtype=np.bool)
        x_no_mu = x.copy()
        for ff in range(nfeature):
            x_no_mu[..., ff], mu_x[...,
                                   ff], mu_x_flag[...,
                                                  ff] = sutil.mean_subtract(
                                                      sdata,
                                                      x[:, :, ff],
                                                      x_flag,
                                                      use_calibrator=True)
        timer.stop()

    else:
        x_no_mu = x.copy()
        tau = sdata['tau'][:].copy()

    # Calculate unique days
    csd_uniq, bmap = np.unique(sdata['csd'][:], return_inverse=True)
    ncsd = csd_uniq.size

    # Prepare unique sources
    classification = np.char.add(np.char.add(sdata['calibrator'][:], '/'),
                                 sdata['source'][:])

    # If requested, load existing coefficients
    if config.coeff is not None:
        coeff = andata.BaseData.from_acq_h5(config.coeff)
        evaluate_only = True
    else:
        evaluate_only = False

    # If requested, set up boot strapping
    if config.bootstrap.enable:

        nboot = config.bootstrap.number
        nchoices = ncsd if config.bootstrap.by_transit else ntime
        nsample = int(config.bootstrap.fraction * nchoices)

        bindex = np.zeros((nboot, nsample), dtype=np.int)
        for roll in range(nboot):
            bindex[roll, :] = np.sort(
                np.random.choice(nchoices,
                                 size=nsample,
                                 replace=config.bootstrap.replace))

    else:

        nboot = 1
        bindex = np.arange(ntime, dtype=np.int)[np.newaxis, :]

    # Prepare output
    if config.output.directory is not None:
        output_dir = config.output.directory
    else:
        output_dir = config.data.directory

    if config.output.suffix is not None:
        output_suffix = config.output.suffix
    else:
        output_suffix = os.path.splitext(os.path.basename(
            config.data.filename))[0]

    # Perform joint fit
    for bb, bind in enumerate(bindex):

        if config.bootstrap.enable and config.bootstrap.by_transit:
            tind = np.concatenate(
                tuple([np.flatnonzero(bmap == ii) for ii in bind]))
        else:
            tind = bind

        ntime = tind.size

        if config.jackknife.enable:
            start = int(
                config.jackknife.start * ncsd
            ) if config.jackknife.start <= 1.0 else config.jackknife.start
            end = int(
                config.jackknife.end *
                ncsd) if config.jackknife.end <= 1.0 else config.jackknife.end

            time_flag_fit = (bmap >= start) & (bmap < end)

            if config.jackknife.restrict_stat:
                time_flag_stat = np.logical_not(time_flag_fit)
            else:
                time_flag_stat = np.ones(ntime, dtype=np.bool)

        else:
            time_flag_fit = np.ones(ntime, dtype=np.bool)
            time_flag_stat = np.ones(ntime, dtype=np.bool)

        logger.info(
            "Fitting data between %s (CSD %d) and %s (CSD %d)" %
            (ephemeris.unix_to_datetime(np.min(
                sdata.time[tind[time_flag_fit]])).strftime("%Y-%m-%d"),
             np.min(sdata['csd'][:][tind[time_flag_fit]]),
             ephemeris.unix_to_datetime(np.max(
                 sdata.time[tind[time_flag_fit]])).strftime("%Y-%m-%d"),
             np.max(sdata['csd'][:][tind[time_flag_fit]])))

        logger.info(
            "Calculating statistics from data between %s (CSD %d) and %s (CSD %d)"
            % (ephemeris.unix_to_datetime(
                np.min(sdata.time[tind[time_flag_stat]])).strftime("%Y-%m-%d"),
               np.min(sdata['csd'][:][tind[time_flag_stat]]),
               ephemeris.unix_to_datetime(
                   np.max(
                       sdata.time[tind[time_flag_stat]])).strftime("%Y-%m-%d"),
               np.max(sdata['csd'][:][tind[time_flag_stat]])))

        if evaluate_only:
            timer.start("Evaluating coefficients provided.")
            fitter = sutil.JointTempEvaluation(
                x_no_mu[:, tind, :],
                tau[:, tind],
                coeff['coeff'][:],
                flag=x_flag[:, tind],
                coeff_name=coeff.index_map['feature'][:],
                feature_name=coeff_name,
                intercept=coeff['intercept'][:],
                intercept_name=coeff.index_map['classification'][:],
                classification=classification[tind])
            timer.stop()

        else:
            timer.start("Setting up fit.  Bootstrap %d of %d." %
                        (bb + 1, nboot))

            fitter = sutil.JointTempRegression(
                x_no_mu[:, tind, :],
                tau[:, tind],
                x_group,
                flag=x_flag[:, tind],
                classification=classification[tind],
                coeff_name=coeff_name)
            timer.stop()

            timer.start("Performing fit.  Bootstrap %d of %d." %
                        (bb + 1, nboot))
            fitter.fit_temp(time_flag=time_flag_fit, **config.fit_options)
            timer.stop()

        # If bootstrapping, append counter to filename
        if config.bootstrap.enable:
            output_suffix_bb = output_suffix + "_bootstrap_%04d" % (
                config.bootstrap.index_start + bb, )

            with open(
                    os.path.join(output_dir,
                                 "bootstrap_index_%s.json" % output_suffix_bb),
                    'w') as jhandler:
                json.dump({
                    "bind": bind.tolist(),
                    "tind": tind.tolist()
                }, jhandler)

        else:
            output_suffix_bb = output_suffix

        # Save statistics to file
        if config.output.stat:

            # If requested, break the model up into its various components for calculating statistics
            stat_key = ['data', 'model', 'resid']
            if config.refine_model.enable:
                stat_add = fitter.refine_model(config.refine_model.include)
                stat_key += stat_add

            # Redefine axes
            bdata = StabilityData()
            for dset in ["source", "csd", "calibrator", "calibrator_time"]:
                bdata.create_dataset(dset, data=sdata[dset][tind])

            bdata.create_index_map("time", sdata.index_map["time"][tind])
            bdata.create_index_map("input", sdata.index_map["input"][:])
            bdata.attrs["calibrator"] = sdata.attrs.get("calibrator", "CYG_A")

            # Calculate statistics
            stat = {}
            for statistic in ['std', 'mad']:
                for attr in stat_key:
                    for ref, ref_common in zip(['mezz', 'cmn'], [False, True]):
                        stat[(statistic, attr, ref)] = sutil.short_long_stat(
                            bdata,
                            getattr(fitter, attr),
                            fitter._flag & time_flag_stat[np.newaxis, :],
                            stat=statistic,
                            ref_common=ref_common,
                            pol=pol)

            output_filename = os.path.join(output_dir,
                                           "stat_%s.h5" % output_suffix_bb)

            write_stat(bdata, stat, fitter, output_filename)

        # Save coefficients to file
        if config.output.coeff:
            output_filename = os.path.join(output_dir,
                                           "coeff_%s.h5" % output_suffix_bb)

            write_coeff(sdata, fitter, output_filename)

        # Save residuals to file
        if config.output.resid:
            output_filename = os.path.join(output_dir,
                                           "resid_%s.h5" % output_suffix_bb)

            write_resid(sdata, fitter, output_filename)

        del fitter
        gc.collect()
コード例 #3
0
def main(config_file=None, logging_params=DEFAULT_LOGGING):

    # Load config
    config = DEFAULTS.deepcopy()
    if config_file is not None:
        config.merge(NameSpace(load_yaml_config(config_file)))

    # Setup logging
    log.setup_logging(logging_params)
    logger = log.get_logger(__name__)

    ## Load data for flagging
    # Load fpga restarts
    time_fpga_restart = []
    if config.fpga_restart_file is not None:

        with open(config.fpga_restart_file, 'r') as handler:
            for line in handler:
                time_fpga_restart.append(
                    ephemeris.datetime_to_unix(
                        ephemeris.timestr_to_datetime(line.split('_')[0])))

    time_fpga_restart = np.array(time_fpga_restart)

    # Load housekeeping flag
    if config.housekeeping_file is not None:
        ftemp = TempData.from_acq_h5(config.housekeeping_file,
                                     datasets=["time_flag"])
    else:
        ftemp = None

    # Load jump data
    if config.jump_file is not None:
        with h5py.File(config.jump_file, 'r') as handler:
            jump_time = handler["time"][:]
            jump_size = handler["jump_size"][:]
    else:
        jump_time = None
        jump_size = None

    # Load rain data
    if config.rain_file is not None:
        with h5py.File(config.rain_file, 'r') as handler:
            rain_ranges = handler["time_range_conservative"][:]
    else:
        rain_ranges = []

    # Load data flags
    data_flags = {}
    if config.data_flags:
        finder.connect_database()
        flag_types = finder.DataFlagType.select()
        possible_data_flags = []
        for ft in flag_types:
            possible_data_flags.append(ft.name)
            if ft.name in config.data_flags:
                new_data_flags = finder.DataFlag.select().where(
                    finder.DataFlag.type == ft)
                data_flags[ft.name] = list(new_data_flags)

    # Set desired range of time
    start_time = (ephemeris.datetime_to_unix(
        datetime.datetime(
            *config.start_date)) if config.start_date is not None else None)
    end_time = (ephemeris.datetime_to_unix(datetime.datetime(
        *config.end_date)) if config.end_date is not None else None)

    ## Find gain files
    files = {}
    for src in config.sources:
        files[src] = sorted(
            glob.glob(
                os.path.join(config.directory, src.lower(),
                             "%s_%s_lsd_*.h5" % (
                                 config.prefix,
                                 src.lower(),
                             ))))
    csd = {}
    for src in config.sources:
        csd[src] = np.array(
            [int(os.path.splitext(ff)[0][-4:]) for ff in files[src]])

    for src in config.sources:
        logger.info("%s:  %d files" % (src, len(csd[src])))

    ## Remove files that occur during flag
    csd_flag = {}
    for src in config.sources:

        body = ephemeris.source_dictionary[src]

        csd_flag[src] = np.ones(csd[src].size, dtype=np.bool)

        for ii, cc in enumerate(csd[src][:]):

            ttrans = ephemeris.transit_times(body,
                                             ephemeris.csd_to_unix(cc))[0]

            if (start_time is not None) and (ttrans < start_time):
                csd_flag[src][ii] = False
                continue

            if (end_time is not None) and (ttrans > end_time):
                csd_flag[src][ii] = False
                continue

            # If requested, remove daytime transits
            if not config.include_daytime.get(
                    src, config.include_daytime.default) and daytime_flag(
                        ttrans)[0]:
                logger.info("%s CSD %d:  daytime transit" % (src, cc))
                csd_flag[src][ii] = False
                continue

            # Remove transits during HKP drop out
            if ftemp is not None:
                itemp = np.flatnonzero(
                    (ftemp.time[:] >= (ttrans - config.transit_window))
                    & (ftemp.time[:] <= (ttrans + config.transit_window)))
                tempflg = ftemp['time_flag'][itemp]
                if (tempflg.size == 0) or ((np.sum(tempflg, dtype=np.float32) /
                                            float(tempflg.size)) < 0.50):
                    logger.info("%s CSD %d:  no housekeeping" % (src, cc))
                    csd_flag[src][ii] = False
                    continue

            # Remove transits near jumps
            if jump_time is not None:
                njump = np.sum((jump_size > config.min_jump_size)
                               & (jump_time > (ttrans - config.jump_window))
                               & (jump_time < ttrans))
                if njump > config.max_njump:
                    logger.info("%s CSD %d:  %d jumps before" %
                                (src, cc, njump))
                    csd_flag[src][ii] = False
                    continue

            # Remove transits near rain
            for rng in rain_ranges:
                if (((ttrans - config.transit_window) <= rng[1])
                        and ((ttrans + config.transit_window) >= rng[0])):

                    logger.info("%s CSD %d:  during rain" % (src, cc))
                    csd_flag[src][ii] = False
                    break

            # Remove transits during data flag
            for name, flag_list in data_flags.items():

                if csd_flag[src][ii]:

                    for flg in flag_list:

                        if (((ttrans - config.transit_window) <=
                             flg.finish_time)
                                and ((ttrans + config.transit_window) >=
                                     flg.start_time)):

                            logger.info("%s CSD %d:  %s flag" %
                                        (src, cc, name))
                            csd_flag[src][ii] = False
                            break

    # Print number of files left after flagging
    for src in config.sources:
        logger.info("%s:  %d files (after flagging)" %
                    (src, np.sum(csd_flag[src])))

    ## Construct pair wise differences
    npair = len(config.diff_pair)
    shift = [nd * 24.0 * 3600.0 for nd in config.nday_shift]

    calmap = []
    calpair = []

    for (tsrc, csrc), sh in zip(config.diff_pair, shift):

        body_test = ephemeris.source_dictionary[tsrc]
        body_cal = ephemeris.source_dictionary[csrc]

        for ii, cc in enumerate(csd[tsrc]):

            if csd_flag[tsrc][ii]:

                test_transit = ephemeris.transit_times(
                    body_test, ephemeris.csd_to_unix(cc))[0]
                cal_transit = ephemeris.transit_times(body_cal,
                                                      test_transit + sh)[0]
                cal_csd = int(np.fix(ephemeris.unix_to_csd(cal_transit)))

                ttrans = np.sort([test_transit, cal_transit])

                if cal_csd in csd[csrc]:
                    jj = list(csd[csrc]).index(cal_csd)

                    if csd_flag[csrc][jj] and not np.any(
                        (time_fpga_restart >= ttrans[0])
                            & (time_fpga_restart <= ttrans[1])):
                        calmap.append([ii, jj])
                        calpair.append([tsrc, csrc])

    calmap = np.array(calmap)
    calpair = np.array(calpair)

    ntransit = calmap.shape[0]

    logger.info("%d total transit pairs" % ntransit)
    for ii in range(ntransit):

        t1 = ephemeris.transit_times(
            ephemeris.source_dictionary[calpair[ii, 0]],
            ephemeris.csd_to_unix(csd[calpair[ii, 0]][calmap[ii, 0]]))[0]
        t2 = ephemeris.transit_times(
            ephemeris.source_dictionary[calpair[ii, 1]],
            ephemeris.csd_to_unix(csd[calpair[ii, 1]][calmap[ii, 1]]))[0]

        logger.info("%s (%d) - %s (%d):  %0.1f hr" %
                    (calpair[ii, 0], csd_flag[calpair[ii, 0]][calmap[ii, 0]],
                     calpair[ii, 1], csd_flag[calpair[ii, 1]][calmap[ii, 1]],
                     (t1 - t2) / 3600.0))

    # Determine unique diff pairs
    diff_name = np.array(['%s/%s' % tuple(cp) for cp in calpair])
    uniq_diff, lbl_diff, cnt_diff = np.unique(diff_name,
                                              return_inverse=True,
                                              return_counts=True)
    ndiff = uniq_diff.size

    for ud, udcnt in zip(uniq_diff, cnt_diff):
        logger.info("%s:  %d transit pairs" % (ud, udcnt))

    ## Load gains
    inputmap = tools.get_correlator_inputs(datetime.datetime.utcnow(),
                                           correlator='chime')
    ninput = len(inputmap)
    nfreq = 1024

    # Set up gain arrays
    gain = np.zeros((2, nfreq, ninput, ntransit), dtype=np.complex64)
    weight = np.zeros((2, nfreq, ninput, ntransit), dtype=np.float32)
    input_sort = np.zeros((2, ninput, ntransit), dtype=np.int)

    kcsd = np.zeros((2, ntransit), dtype=np.float32)
    timestamp = np.zeros((2, ntransit), dtype=np.float64)
    is_daytime = np.zeros((2, ntransit), dtype=np.bool)

    for tt in range(ntransit):

        for kk, (src, ind) in enumerate(zip(calpair[tt], calmap[tt])):

            body = ephemeris.source_dictionary[src]
            filename = files[src][ind]

            logger.info("%s:  %s" % (src, filename))

            temp = containers.StaticGainData.from_file(filename)

            freq = temp.freq[:]
            inputs = temp.input[:]

            isort = reorder_inputs(inputmap, inputs)
            inputs = inputs[isort]

            gain[kk, :, :, tt] = temp.gain[:, isort]
            weight[kk, :, :, tt] = temp.weight[:, isort]
            input_sort[kk, :, tt] = isort

            kcsd[kk, tt] = temp.attrs['lsd']
            timestamp[kk, tt] = ephemeris.transit_times(
                body, ephemeris.csd_to_unix(kcsd[kk, tt]))[0]
            is_daytime[kk, tt] = daytime_flag(timestamp[kk, tt])[0]

            if np.any(isort != np.arange(isort.size)):
                logger.info("Input ordering has changed: %s" %
                            ephemeris.unix_to_datetime(
                                timestamp[kk, tt]).strftime("%Y-%m-%d"))

        logger.info("")

    inputs = np.array([(inp.id, inp.input_sn) for inp in inputmap],
                      dtype=[('chan_id', 'u2'), ('correlator_input', 'S32')])

    ## Load input flags
    inpflg = np.ones((2, ninput, ntransit), dtype=np.bool)

    min_flag_time = np.min(timestamp) - 7.0 * 24.0 * 60.0 * 60.0
    max_flag_time = np.max(timestamp) + 7.0 * 24.0 * 60.0 * 60.0

    flaginput_files = sorted(
        glob.glob(
            os.path.join(config.flaginput_dir, "*" + config.flaginput_suffix,
                         "*.h5")))

    if flaginput_files:
        logger.info("Found %d flaginput files." % len(flaginput_files))
        tmp = andata.FlagInputData.from_acq_h5(flaginput_files, datasets=())
        start, stop = [
            int(yy) for yy in np.percentile(
                np.flatnonzero((tmp.time[:] >= min_flag_time)
                               & (tmp.time[:] <= max_flag_time)), [0, 100])
        ]

        cont = andata.FlagInputData.from_acq_h5(flaginput_files,
                                                start=start,
                                                stop=stop,
                                                datasets=['flag'])

        for kk in range(2):
            inpflg[kk, :, :] = cont.resample('flag',
                                             timestamp[kk],
                                             transpose=True)

            logger.info("Flaginput time offsets in minutes (pair %d):" % kk)
            logger.info(
                str(
                    np.fix((cont.time[cont.search_update_time(timestamp[kk])] -
                            timestamp[kk]) / 60.0).astype(np.int)))

    # Sort flags so they are in same order
    for tt in range(ntransit):
        for kk in range(2):
            inpflg[kk, :, tt] = inpflg[kk, input_sort[kk, :, tt], tt]

    # Do not apply input flag to phase reference
    for ii in config.index_phase_ref:
        inpflg[:, ii, :] = True

    ## Flag out gains with high uncertainty and frequencies with large fraction of data flagged
    frac_err = tools.invert_no_zero(np.sqrt(weight) * np.abs(gain))

    flag = np.all((weight > 0.0) & (np.abs(gain) > 0.0) &
                  (frac_err < config.max_uncertainty),
                  axis=0)

    freq_flag = ((np.sum(flag, axis=(1, 2), dtype=np.float32) /
                  float(np.prod(flag.shape[1:]))) > config.freq_threshold)

    if config.apply_rfi_mask:
        freq_flag &= np.logical_not(rfi.frequency_mask(freq))

    flag = flag & freq_flag[:, np.newaxis, np.newaxis]

    good_freq = np.flatnonzero(freq_flag)

    logger.info("Number good frequencies %d" % good_freq.size)

    ## Generate flags with more conservative cuts on frequency
    c_flag = flag & np.all(frac_err < config.conservative.max_uncertainty,
                           axis=0)

    c_freq_flag = ((np.sum(c_flag, axis=(1, 2), dtype=np.float32) /
                    float(np.prod(c_flag.shape[1:]))) >
                   config.conservative.freq_threshold)

    if config.conservative.apply_rfi_mask:
        c_freq_flag &= np.logical_not(rfi.frequency_mask(freq))

    c_flag = c_flag & c_freq_flag[:, np.newaxis, np.newaxis]

    c_good_freq = np.flatnonzero(c_freq_flag)

    logger.info("Number good frequencies (conservative thresholds) %d" %
                c_good_freq.size)

    ## Apply input flags
    flag &= np.all(inpflg[:, np.newaxis, :, :], axis=0)

    ## Update flags based on beam flag
    if config.beam_flag_file is not None:

        dbeam = andata.BaseData.from_acq_h5(config.beam_flag_file)

        db_csd = np.floor(ephemeris.unix_to_csd(dbeam.index_map['time'][:]))

        for ii, name in enumerate(config.beam_flag_datasets):
            logger.info("Applying %s beam flag." % name)
            if not ii:
                db_flag = dbeam.flags[name][:]
            else:
                db_flag &= dbeam.flags[name][:]

        cnt = 0
        for ii, dbc in enumerate(db_csd):

            this_csd = np.flatnonzero(np.any(kcsd == dbc, axis=0))

            if this_csd.size > 0:

                logger.info("Beam flag for %d matches %s." %
                            (dbc, str(kcsd[:, this_csd])))

                flag[:, :, this_csd] &= db_flag[np.newaxis, :, ii, np.newaxis]

                cnt += 1

        logger.info("Applied %0.1f percent of the beam flags" %
                    (100.0 * cnt / float(db_csd.size), ))

    ## Flag inputs with large amount of missing data
    input_frac_flagged = (
        np.sum(flag[good_freq, :, :], axis=(0, 2), dtype=np.float32) /
        float(good_freq.size * ntransit))
    input_flag = input_frac_flagged > config.input_threshold

    for ii in config.index_phase_ref:
        logger.info("Phase reference %d has %0.3f fraction of data flagged." %
                    (ii, input_frac_flagged[ii]))
        input_flag[ii] = True

    good_input = np.flatnonzero(input_flag)

    flag = flag & input_flag[np.newaxis, :, np.newaxis]

    logger.info("Number good inputs %d" % good_input.size)

    ## Calibrate
    gaincal = gain[0] * tools.invert_no_zero(gain[1])

    frac_err_cal = np.sqrt(frac_err[0]**2 + frac_err[1]**2)

    count = np.sum(flag, axis=-1, dtype=np.int)
    stat_flag = count > config.min_num_transit

    ## Calculate phase
    amp = np.abs(gaincal)
    phi = np.angle(gaincal)

    ## Calculate polarisation groups
    pol_dict = {'E': 'X', 'S': 'Y'}
    cyl_dict = {2: 'A', 3: 'B', 4: 'C', 5: 'D'}

    if config.group_by_cyl:
        group_id = [
            (inp.pol,
             inp.cyl) if tools.is_chime(inp) and (ii in good_input) else None
            for ii, inp in enumerate(inputmap)
        ]
    else:
        group_id = [
            inp.pol if tools.is_chime(inp) and (ii in good_input) else None
            for ii, inp in enumerate(inputmap)
        ]

    ugroup_id = sorted([uidd for uidd in set(group_id) if uidd is not None])
    ngroup = len(ugroup_id)

    group_list_noref = [
        np.array([
            gg for gg, gid in enumerate(group_id)
            if (gid == ugid) and gg not in config.index_phase_ref
        ]) for ugid in ugroup_id
    ]

    group_list = [
        np.array([gg for gg, gid in enumerate(group_id) if gid == ugid])
        for ugid in ugroup_id
    ]

    if config.group_by_cyl:
        group_str = [
            "%s-%s" % (pol_dict[pol], cyl_dict[cyl]) for pol, cyl in ugroup_id
        ]
    else:
        group_str = [pol_dict[pol] for pol in ugroup_id]

    index_phase_ref = []
    for gstr, igroup in zip(group_str, group_list):
        candidate = [ii for ii in config.index_phase_ref if ii in igroup]
        if len(candidate) != 1:
            index_phase_ref.append(None)
        else:
            index_phase_ref.append(candidate[0])

    logger.info(
        "Phase reference: %s" %
        ', '.join(['%s = %s' % tpl
                   for tpl in zip(group_str, index_phase_ref)]))

    ## Apply thermal correction to amplitude
    if config.amp_thermal.enabled:

        logger.info("Applying thermal correction.")

        # Load the temperatures
        tdata = TempData.from_acq_h5(config.amp_thermal.filename)

        index = tdata.search_sensors(config.amp_thermal.sensor)[0]

        temp = tdata.datasets[config.amp_thermal.field][index]
        temp_func = scipy.interpolate.interp1d(tdata.time, temp,
                                               **config.amp_thermal.interp)

        itemp = temp_func(timestamp)
        dtemp = itemp[0] - itemp[1]

        flag_func = scipy.interpolate.interp1d(
            tdata.time, tdata.datasets['flag'][index].astype(np.float32),
            **config.amp_thermal.interp)

        dtemp_flag = np.all(flag_func(timestamp) == 1.0, axis=0)

        flag &= dtemp_flag[np.newaxis, np.newaxis, :]

        for gstr, igroup in zip(group_str, group_list):
            pstr = gstr[0]
            thermal_coeff = np.polyval(config.amp_thermal.coeff[pstr], freq)
            gthermal = 1.0 + thermal_coeff[:, np.newaxis, np.newaxis] * dtemp[
                np.newaxis, np.newaxis, :]

            amp[:, igroup, :] *= tools.invert_no_zero(gthermal)

    ## Compute common mode
    if config.subtract_common_mode_before:
        logger.info("Calculating common mode amplitude and phase.")
        cmn_amp, flag_cmn_amp = compute_common_mode(amp,
                                                    flag,
                                                    group_list_noref,
                                                    median=False)
        cmn_phi, flag_cmn_phi = compute_common_mode(phi,
                                                    flag,
                                                    group_list_noref,
                                                    median=False)

        # Subtract common mode (from phase only)
        logger.info("Subtracting common mode phase.")
        group_flag = np.zeros((ngroup, ninput), dtype=np.bool)
        for gg, igroup in enumerate(group_list):
            group_flag[gg, igroup] = True
            phi[:,
                igroup, :] = phi[:, igroup, :] - cmn_phi[:, gg, np.newaxis, :]

            for iref in index_phase_ref:
                if (iref is not None) and (iref in igroup):
                    flag[:, iref, :] = flag_cmn_phi[:, gg, :]

    ## If requested, determine and subtract a delay template
    if config.fit_delay_before:
        logger.info("Fitting delay template.")
        omega = timing.FREQ_TO_OMEGA * freq

        tau, tau_flag, _ = construct_delay_template(
            omega,
            phi,
            c_flag & flag,
            min_num_freq_for_delay_fit=config.min_num_freq_for_delay_fit)

        # Compute residuals
        logger.info("Subtracting delay template.")
        phi = phi - tau[np.newaxis, :, :] * omega[:, np.newaxis, np.newaxis]

    ## Normalize by median over time
    logger.info("Calculating median amplitude and phase.")
    med_amp = np.zeros((nfreq, ninput, ndiff), dtype=amp.dtype)
    med_phi = np.zeros((nfreq, ninput, ndiff), dtype=phi.dtype)

    count_by_diff = np.zeros((nfreq, ninput, ndiff), dtype=np.int)
    stat_flag_by_diff = np.zeros((nfreq, ninput, ndiff), dtype=np.bool)

    def weighted_mean(yy, ww, axis=-1):
        return np.sum(ww * yy, axis=axis) * tools.invert_no_zero(
            np.sum(ww, axis=axis))

    for dd in range(ndiff):

        this_diff = np.flatnonzero(lbl_diff == dd)

        this_flag = flag[:, :, this_diff]

        this_amp = amp[:, :, this_diff]
        this_amp_err = this_amp * frac_err_cal[:, :,
                                               this_diff] * this_flag.astype(
                                                   np.float32)

        this_phi = phi[:, :, this_diff]
        this_phi_err = frac_err_cal[:, :, this_diff] * this_flag.astype(
            np.float32)

        count_by_diff[:, :, dd] = np.sum(this_flag, axis=-1, dtype=np.int)
        stat_flag_by_diff[:, :,
                          dd] = count_by_diff[:, :,
                                              dd] > config.min_num_transit

        if config.weighted_mean == 2:
            logger.info("Calculating inverse variance weighted mean.")
            med_amp[:, :,
                    dd] = weighted_mean(this_amp,
                                        tools.invert_no_zero(this_amp_err**2),
                                        axis=-1)
            med_phi[:, :,
                    dd] = weighted_mean(this_phi,
                                        tools.invert_no_zero(this_phi_err**2),
                                        axis=-1)

        elif config.weighted_mean == 1:
            logger.info("Calculating uniform weighted mean.")
            med_amp[:, :, dd] = weighted_mean(this_amp,
                                              this_flag.astype(np.float32),
                                              axis=-1)
            med_phi[:, :, dd] = weighted_mean(this_phi,
                                              this_flag.astype(np.float32),
                                              axis=-1)

        else:
            logger.info("Calculating median value.")
            for ff in range(nfreq):
                for ii in range(ninput):
                    if np.any(this_flag[ff, ii, :]):
                        med_amp[ff, ii, dd] = wq.median(
                            this_amp[ff, ii, :],
                            this_flag[ff, ii, :].astype(np.float32))
                        med_phi[ff, ii, dd] = wq.median(
                            this_phi[ff, ii, :],
                            this_flag[ff, ii, :].astype(np.float32))

    damp = np.zeros_like(amp)
    dphi = np.zeros_like(phi)
    for dd in range(ndiff):
        this_diff = np.flatnonzero(lbl_diff == dd)
        damp[:, :, this_diff] = amp[:, :, this_diff] * tools.invert_no_zero(
            med_amp[:, :, dd, np.newaxis]) - 1.0
        dphi[:, :,
             this_diff] = phi[:, :, this_diff] - med_phi[:, :, dd, np.newaxis]

    # Compute common mode
    if not config.subtract_common_mode_before:
        logger.info("Calculating common mode amplitude and phase.")
        cmn_amp, flag_cmn_amp = compute_common_mode(damp,
                                                    flag,
                                                    group_list_noref,
                                                    median=True)
        cmn_phi, flag_cmn_phi = compute_common_mode(dphi,
                                                    flag,
                                                    group_list_noref,
                                                    median=True)

        # Subtract common mode (from phase only)
        logger.info("Subtracting common mode phase.")
        group_flag = np.zeros((ngroup, ninput), dtype=np.bool)
        for gg, igroup in enumerate(group_list):
            group_flag[gg, igroup] = True
            dphi[:, igroup, :] = dphi[:, igroup, :] - cmn_phi[:, gg,
                                                              np.newaxis, :]

            for iref in index_phase_ref:
                if (iref is not None) and (iref in igroup):
                    flag[:, iref, :] = flag_cmn_phi[:, gg, :]

    ## Compute RMS
    logger.info("Calculating RMS of amplitude and phase.")
    mad_amp = np.zeros((nfreq, ninput), dtype=amp.dtype)
    std_amp = np.zeros((nfreq, ninput), dtype=amp.dtype)

    mad_phi = np.zeros((nfreq, ninput), dtype=phi.dtype)
    std_phi = np.zeros((nfreq, ninput), dtype=phi.dtype)

    mad_amp_by_diff = np.zeros((nfreq, ninput, ndiff), dtype=amp.dtype)
    std_amp_by_diff = np.zeros((nfreq, ninput, ndiff), dtype=amp.dtype)

    mad_phi_by_diff = np.zeros((nfreq, ninput, ndiff), dtype=phi.dtype)
    std_phi_by_diff = np.zeros((nfreq, ninput, ndiff), dtype=phi.dtype)

    for ff in range(nfreq):
        for ii in range(ninput):
            this_flag = flag[ff, ii, :]
            if np.any(this_flag):
                std_amp[ff, ii] = np.std(damp[ff, ii, this_flag])
                std_phi[ff, ii] = np.std(dphi[ff, ii, this_flag])

                mad_amp[ff, ii] = 1.48625 * wq.median(
                    np.abs(damp[ff, ii, :]), this_flag.astype(np.float32))
                mad_phi[ff, ii] = 1.48625 * wq.median(
                    np.abs(dphi[ff, ii, :]), this_flag.astype(np.float32))

                for dd in range(ndiff):
                    this_diff = this_flag & (lbl_diff == dd)
                    if np.any(this_diff):

                        std_amp_by_diff[ff, ii, dd] = np.std(damp[ff, ii,
                                                                  this_diff])
                        std_phi_by_diff[ff, ii, dd] = np.std(dphi[ff, ii,
                                                                  this_diff])

                        mad_amp_by_diff[ff, ii, dd] = 1.48625 * wq.median(
                            np.abs(damp[ff, ii, :]),
                            this_diff.astype(np.float32))
                        mad_phi_by_diff[ff, ii, dd] = 1.48625 * wq.median(
                            np.abs(dphi[ff, ii, :]),
                            this_diff.astype(np.float32))

    ## Construct delay template
    if not config.fit_delay_before:
        logger.info("Fitting delay template.")
        omega = timing.FREQ_TO_OMEGA * freq

        tau, tau_flag, _ = construct_delay_template(
            omega,
            dphi,
            c_flag & flag,
            min_num_freq_for_delay_fit=config.min_num_freq_for_delay_fit)

        # Compute residuals
        logger.info("Subtracting delay template from phase.")
        resid = (dphi - tau[np.newaxis, :, :] *
                 omega[:, np.newaxis, np.newaxis]) * flag.astype(np.float32)

    else:
        resid = dphi

    tau_count = np.sum(tau_flag, axis=-1, dtype=np.int)
    tau_stat_flag = tau_count > config.min_num_transit

    tau_count_by_diff = np.zeros((ninput, ndiff), dtype=np.int)
    tau_stat_flag_by_diff = np.zeros((ninput, ndiff), dtype=np.bool)
    for dd in range(ndiff):
        this_diff = np.flatnonzero(lbl_diff == dd)
        tau_count_by_diff[:, dd] = np.sum(tau_flag[:, this_diff],
                                          axis=-1,
                                          dtype=np.int)
        tau_stat_flag_by_diff[:,
                              dd] = tau_count_by_diff[:,
                                                      dd] > config.min_num_transit

    ## Calculate statistics of residuals
    std_resid = np.zeros((nfreq, ninput), dtype=phi.dtype)
    mad_resid = np.zeros((nfreq, ninput), dtype=phi.dtype)

    std_resid_by_diff = np.zeros((nfreq, ninput, ndiff), dtype=phi.dtype)
    mad_resid_by_diff = np.zeros((nfreq, ninput, ndiff), dtype=phi.dtype)

    for ff in range(nfreq):
        for ii in range(ninput):
            this_flag = flag[ff, ii, :]
            if np.any(this_flag):
                std_resid[ff, ii] = np.std(resid[ff, ii, this_flag])
                mad_resid[ff, ii] = 1.48625 * wq.median(
                    np.abs(resid[ff, ii, :]), this_flag.astype(np.float32))

                for dd in range(ndiff):
                    this_diff = this_flag & (lbl_diff == dd)
                    if np.any(this_diff):
                        std_resid_by_diff[ff, ii,
                                          dd] = np.std(resid[ff, ii,
                                                             this_diff])
                        mad_resid_by_diff[ff, ii, dd] = 1.48625 * wq.median(
                            np.abs(resid[ff, ii, :]),
                            this_diff.astype(np.float32))

    ## Calculate statistics of delay template
    mad_tau = np.zeros((ninput, ), dtype=phi.dtype)
    std_tau = np.zeros((ninput, ), dtype=phi.dtype)

    mad_tau_by_diff = np.zeros((ninput, ndiff), dtype=phi.dtype)
    std_tau_by_diff = np.zeros((ninput, ndiff), dtype=phi.dtype)

    for ii in range(ninput):
        this_flag = tau_flag[ii]
        if np.any(this_flag):
            std_tau[ii] = np.std(tau[ii, this_flag])
            mad_tau[ii] = 1.48625 * wq.median(np.abs(tau[ii]),
                                              this_flag.astype(np.float32))

            for dd in range(ndiff):
                this_diff = this_flag & (lbl_diff == dd)
                if np.any(this_diff):
                    std_tau_by_diff[ii, dd] = np.std(tau[ii, this_diff])
                    mad_tau_by_diff[ii, dd] = 1.48625 * wq.median(
                        np.abs(tau[ii]), this_diff.astype(np.float32))

    ## Define output
    res = {
        "timestamp": {
            "data": timestamp,
            "axis": ["div", "time"]
        },
        "is_daytime": {
            "data": is_daytime,
            "axis": ["div", "time"]
        },
        "csd": {
            "data": kcsd,
            "axis": ["div", "time"]
        },
        "pair_map": {
            "data": lbl_diff,
            "axis": ["time"]
        },
        "pair_count": {
            "data": cnt_diff,
            "axis": ["pair"]
        },
        "gain": {
            "data": gaincal,
            "axis": ["freq", "input", "time"]
        },
        "frac_err": {
            "data": frac_err_cal,
            "axis": ["freq", "input", "time"]
        },
        "flags/gain": {
            "data": flag,
            "axis": ["freq", "input", "time"],
            "flag": True
        },
        "flags/gain_conservative": {
            "data": c_flag,
            "axis": ["freq", "input", "time"],
            "flag": True
        },
        "flags/count": {
            "data": count,
            "axis": ["freq", "input"],
            "flag": True
        },
        "flags/stat": {
            "data": stat_flag,
            "axis": ["freq", "input"],
            "flag": True
        },
        "flags/count_by_pair": {
            "data": count_by_diff,
            "axis": ["freq", "input", "pair"],
            "flag": True
        },
        "flags/stat_by_pair": {
            "data": stat_flag_by_diff,
            "axis": ["freq", "input", "pair"],
            "flag": True
        },
        "med_amp": {
            "data": med_amp,
            "axis": ["freq", "input", "pair"]
        },
        "med_phi": {
            "data": med_phi,
            "axis": ["freq", "input", "pair"]
        },
        "flags/group_flag": {
            "data": group_flag,
            "axis": ["group", "input"],
            "flag": True
        },
        "cmn_amp": {
            "data": cmn_amp,
            "axis": ["freq", "group", "time"]
        },
        "cmn_phi": {
            "data": cmn_phi,
            "axis": ["freq", "group", "time"]
        },
        "amp": {
            "data": damp,
            "axis": ["freq", "input", "time"]
        },
        "phi": {
            "data": dphi,
            "axis": ["freq", "input", "time"]
        },
        "std_amp": {
            "data": std_amp,
            "axis": ["freq", "input"]
        },
        "std_amp_by_pair": {
            "data": std_amp_by_diff,
            "axis": ["freq", "input", "pair"]
        },
        "mad_amp": {
            "data": mad_amp,
            "axis": ["freq", "input"]
        },
        "mad_amp_by_pair": {
            "data": mad_amp_by_diff,
            "axis": ["freq", "input", "pair"]
        },
        "std_phi": {
            "data": std_phi,
            "axis": ["freq", "input"]
        },
        "std_phi_by_pair": {
            "data": std_phi_by_diff,
            "axis": ["freq", "input", "pair"]
        },
        "mad_phi": {
            "data": mad_phi,
            "axis": ["freq", "input"]
        },
        "mad_phi_by_pair": {
            "data": mad_phi_by_diff,
            "axis": ["freq", "input", "pair"]
        },
        "tau": {
            "data": tau,
            "axis": ["input", "time"]
        },
        "flags/tau": {
            "data": tau_flag,
            "axis": ["input", "time"],
            "flag": True
        },
        "flags/tau_count": {
            "data": tau_count,
            "axis": ["input"],
            "flag": True
        },
        "flags/tau_stat": {
            "data": tau_stat_flag,
            "axis": ["input"],
            "flag": True
        },
        "flags/tau_count_by_pair": {
            "data": tau_count_by_diff,
            "axis": ["input", "pair"],
            "flag": True
        },
        "flags/tau_stat_by_pair": {
            "data": tau_stat_flag_by_diff,
            "axis": ["input", "pair"],
            "flag": True
        },
        "std_tau": {
            "data": std_tau,
            "axis": ["input"]
        },
        "std_tau_by_pair": {
            "data": std_tau_by_diff,
            "axis": ["input", "pair"]
        },
        "mad_tau": {
            "data": mad_tau,
            "axis": ["input"]
        },
        "mad_tau_by_pair": {
            "data": mad_tau_by_diff,
            "axis": ["input", "pair"]
        },
        "resid_phi": {
            "data": resid,
            "axis": ["freq", "input", "time"]
        },
        "std_resid_phi": {
            "data": std_resid,
            "axis": ["freq", "input"]
        },
        "std_resid_phi_by_pair": {
            "data": std_resid_by_diff,
            "axis": ["freq", "input", "pair"]
        },
        "mad_resid_phi": {
            "data": mad_resid,
            "axis": ["freq", "input"]
        },
        "mad_resid_phi_by_pair": {
            "data": mad_resid_by_diff,
            "axis": ["freq", "input", "pair"]
        },
    }

    ## Create the output container
    logger.info("Creating StabilityData container.")
    data = StabilityData()

    data.create_index_map(
        "div", np.array(["numerator", "denominator"], dtype=np.string_))
    data.create_index_map("pair", np.array(uniq_diff, dtype=np.string_))
    data.create_index_map("group", np.array(group_str, dtype=np.string_))

    data.create_index_map("freq", freq)
    data.create_index_map("input", inputs)
    data.create_index_map("time", timestamp[0, :])

    logger.info("Writing datsets to container.")
    for name, dct in res.iteritems():
        is_flag = dct.get('flag', False)
        if is_flag:
            dset = data.create_flag(name.split('/')[-1], data=dct['data'])
        else:
            dset = data.create_dataset(name, data=dct['data'])

        dset.attrs['axis'] = np.array(dct['axis'], dtype=np.string_)

    data.attrs['phase_ref'] = np.array(
        [iref for iref in index_phase_ref if iref is not None])

    # Determine the output filename and save results
    start_time, end_time = ephemeris.unix_to_datetime(
        np.percentile(timestamp, [0, 100]))
    tfmt = "%Y%m%d"
    night_str = 'night_' if not np.any(is_daytime) else ''
    output_file = os.path.join(
        config.output_dir, "%s_%s_%sraw_stability_data.h5" %
        (start_time.strftime(tfmt), end_time.strftime(tfmt), night_str))

    logger.info("Saving results to %s." % output_file)
    data.save(output_file)
コード例 #4
0
from ch_util import rfi
from ch_util import finder

from ch_pipeline.core import containers
from ch_pipeline.analysis.flagging import daytime_flag

from complex_gain.temps import TempData
from complex_gain.sutil import construct_delay_template, compute_common_mode

###################################################
# default variables
###################################################

DEFAULTS = NameSpace(
    load_yaml_config(
        os.path.join(os.path.dirname(os.path.realpath(__file__)),
                     'defaults.yaml') + ':stability'))

LOG_FILE = os.environ.get(
    'STABILITY_LOG_FILE',
    os.path.join(os.path.dirname(os.path.realpath(__file__)), 'stability.log'))

DEFAULT_LOGGING = {
    'formatters': {
        'std': {
            'format': "%(asctime)s %(levelname)s %(name)s: %(message)s",
            'datefmt': "%m/%d %H:%M:%S"
        },
    },
    'handlers': {
        'stderr': {
コード例 #5
0
ファイル: regress_temp.py プロジェクト: ssiegelx/complex_gain
def main(filename, config_file=None, logging_params=DEFAULT_LOGGING):

    # Load config
    config = DEFAULTS.deepcopy()
    if config_file is not None:
        config.merge(NameSpace(load_yaml_config(config_file)))

    # Setup logging
    log.setup_logging(logging_params)
    logger = log.get_logger(__name__)

    # Load the data
    dsets = config.datasets + ['flags/%s' % name for name in config.flags
                               ] + ['timestamp', 'pair_map']

    logger.info("Requesting datasets: %s" % str(dsets))

    data = StabilityData.from_acq_h5(filename, datasets=dsets)

    logger.info("Loaded datasets: %s" % str(data.datasets.keys()))

    # Load the temperatures
    tdata = TempData.from_acq_h5(config.temp_filename)

    # Interpolate requested temperatures to time of transits
    if config.sensors is not None:
        index = np.sort(
            np.concatenate(
                tuple([tdata.search_sensors(name)
                       for name in config.sensors])))
    else:
        index = slice(None)

    temp = tdata.datasets[config.temp_field][index]
    temp_func = scipy.interpolate.interp1d(tdata.time,
                                           temp,
                                           axis=-1,
                                           **config.interp)

    itemp = temp_func(data.datasets['timestamp'][:])

    # Difference temperatures between two transits
    feature = []
    dtemp = np.zeros((itemp.shape[-1], itemp.shape[0]), dtype=itemp.dtype)
    for isensor, stemp in enumerate(itemp):

        if config.is_ns_dist and config.is_ns_dist[isensor]:

            feature.append(tdata.sensor[index[isensor]] + '_ns_dist')

            coeff = np.array([[
                np.sin(
                    np.radians(FluxCatalog[ss].dec - ephemeris.CHIMELATITUDE))
                for ss in pair.decode("UTF-8").split('/')
            ] for pair in data.index_map['pair'][data.datasets['pair_map']]]).T

            dtemp[:, isensor] = coeff[0] * stemp[0] - coeff[1] * stemp[1]

        else:
            feature.append(tdata.sensor[index[isensor]])
            dtemp[:, isensor] = stemp[0] - stemp[1]

    # Generate flags for the temperature data
    flag_func = scipy.interpolate.interp1d(
        tdata.time,
        tdata.datasets['flag'][index].astype(np.float32),
        axis=-1,
        **config.interp)

    dtemp_flag = np.all(flag_func(data.datasets['timestamp'][:]) == 1.0,
                        axis=(0, 1))

    # Add temperature information to data object
    data.create_index_map('feature', np.array(feature, dtype=np.string_))

    dset = data.create_dataset("temp", data=dtemp)
    dset.attrs['axis'] = np.array(['time', 'feature'], dtype=np.string_)

    dset = data.create_flag("temp", data=dtemp_flag)
    dset.attrs['axis'] = np.array(['time', 'feature'], dtype=np.string_)

    # Perform the fit
    for dkey, fkey in zip(config.datasets, config.flags):

        logger.info("Now fitting %s using %s flags" % (dkey, fkey))

        this_data = data.datasets[dkey][:]
        expand = tuple([None] * (this_data.ndim - 1) + [slice(None)])
        this_flag = data.flags[fkey][:] & dtemp_flag[expand]

        fitter = TempRegression(dtemp, this_data, flag=this_flag)
        fitter.process()

        # Save results
        for out in ['model', 'resid']:
            dset = data.create_dataset('_'.join([config.prefix, out, dkey]),
                                       data=getattr(fitter, out))
            dset.attrs['axis'] = data.datasets[dkey].attrs['axis'].copy()

            for stat in ['mad', 'std']:
                dset = data.create_dataset(
                    '_'.join([stat, config.prefix, out, dkey]),
                    data=getattr(fitter, '_'.join([stat, out])))
                dset.attrs['axis'] = data.datasets[dkey].attrs[
                    'axis'][:-1].copy()

        for out in ['intercept', 'number']:
            dset = data.create_dataset('_'.join([config.prefix, out, dkey]),
                                       data=getattr(fitter, out))
            dset.attrs['axis'] = data.datasets[dkey].attrs['axis'][:-1].copy()

        dset = data.create_dataset('_'.join([config.prefix, 'coeff', dkey]),
                                   data=fitter.coeff)
        dset.attrs['axis'] = np.array(
            list(data.datasets[dkey].attrs['axis'][:-1]) + ['feature'],
            dtype=np.string_)

    # Save the results to disk
    output_filename = os.path.splitext(
        filename)[0] + '_' + config.output_suffix + '.h5'

    data.save(output_filename)
コード例 #6
0
ファイル: regress_temp.py プロジェクト: ssiegelx/complex_gain
from wtl.namespace import NameSpace
from wtl.config import load_yaml_config

from ch_util.fluxcat import FluxCatalog
from ch_util import ephemeris

from temps import TempData
from stability import StabilityData

###################################################
# default variables
###################################################

DEFAULTS = NameSpace(
    load_yaml_config(
        os.path.join(os.path.dirname(os.path.realpath(__file__)),
                     'defaults.yaml') + ':regress_temp'))

LOG_FILE = os.environ.get(
    'REGRESS_TEMP_LOG_FILE',
    os.path.join(os.path.dirname(os.path.realpath(__file__)),
                 'regress_temp.log'))

DEFAULT_LOGGING = {
    'formatters': {
        'std': {
            'format': "%(asctime)s %(levelname)s %(name)s: %(message)s",
            'datefmt': "%m/%d %H:%M:%S"
        },
    },
    'handlers': {
コード例 #7
0
def main(config_file=None, logging_params=DEFAULT_LOGGING):

    # Setup logging
    log.setup_logging(logging_params)
    mlog = log.get_logger(__name__)

    # Set config
    config = DEFAULTS.deepcopy()
    if config_file is not None:
        config.merge(NameSpace(load_yaml_config(config_file)))

    # Set niceness
    current_niceness = os.nice(0)
    os.nice(config.niceness - current_niceness)
    mlog.info('Changing process niceness from %d to %d.  Confirm:  %d' %
              (current_niceness, config.niceness, os.nice(0)))

    # Find acquisition files
    acq_files = sorted(
        glob.glob(os.path.join(config.data_dir, config.acq, "*.h5")))
    nfiles = len(acq_files)

    # Find cal files
    if config.time_iter and (config.cal_acq is not None):
        cal_files = sorted(
            glob.glob(os.path.join(config.data_dir, config.cal_acq, "*.h5")))
        ncal_files = len(cal_files)
        mlog.info('Found %d chimecal files.' % ncal_files)

        cal_rdr = andata.CorrData.from_acq_h5(cal_files, datasets=())

    else:
        ncal_files = 0

    # Create transit tracker
    transit_tracker = TransitTracker(nsigma=config.nsigma, shift=config.shift)

    for name in config.all_sources:
        transit_tracker[name] = FluxCatalog[name].skyfield

    for aa in acq_files:
        transit_tracker.add_file(aa)

    transit_files = transit_tracker.get_transits()

    for src, csd, is_day, files, aa, bb in transit_files:
        mlog.info("%s | CSD %d | %d | (%d, %d)" % (src, csd, is_day, aa, bb))
        for ff in files:
            mlog.info("%s" % ff)

    mlog.info(''.join(['-'] * 80))

    # Specify some parameters for algorithm
    N = 2048

    noffset = len(config.offsets)

    if config.sep_pol:
        rank = 1
        cross_pol = False
        pol = np.array(['S', 'E'])
        pol_s = np.array(
            [rr + 256 * xx for xx in range(0, 8, 2) for rr in range(256)])
        pol_e = np.array(
            [rr + 256 * xx for xx in range(1, 8, 2) for rr in range(256)])
        prod_ss = []
        prod_ee = []
    else:
        rank = 2
        cross_pol = config.cross_pol
        pol = np.array(['all'])

    npol = pol.size

    # Create file prefix and suffix
    prefix = []

    prefix.append("gain_solutions")

    if config.output_prefix is not None:
        prefix.append(config.output_prefix)

    prefix = '_'.join(prefix)

    suffix = []

    suffix.append("pol_%s" % '_'.join(pol))

    if config.time_iter:
        suffix.append("time_iter")
    else:
        suffix.append("niter_%d" % config.niter)

    if cross_pol:
        suffix.append("zerocross")
    else:
        suffix.append("keepcross")

    if config.normalize:
        suffix.append("normed")
    else:
        suffix.append("notnormed")

    suffix = '_'.join(suffix)

    # Loop over source transits
    for src, csd, is_day, files, start, stop in transit_files:

        if src not in config.all_sources:
            continue

        if (config.good_csd is not None) and (csd not in config.good_csd):
            continue

        mlog.info("%s | CSD %d | %d | (%d, %d)" %
                  (src, csd, is_day, start, stop))

        nfiles = len(files)

        output_file = os.path.join(
            config.output_dir,
            "%s_%s_CSD_%d_%s.pickle" % (prefix, src, csd, suffix))

        mlog.info("Saving to:  %s" % output_file)

        # Get info about this set of files
        data = andata.CorrData.from_acq_h5(files,
                                           datasets=['flags/inputs'],
                                           start=start,
                                           stop=stop)

        ra = ephemeris.lsa(data.time)
        prod = data.prod

        fstart = config.freq_start if config.freq_start is not None else 0
        fstop = config.freq_stop if config.freq_stop is not None else data.freq.size
        freq_index = range(fstart, fstop)

        freq = data.freq[freq_index]

        ntime = ra.size
        nfreq = freq.size

        # Determind bad inputs
        if config.bad_input_file is None or not os.path.isfile(
                config.bad_input_file):
            bad_input = np.flatnonzero(
                ~np.all(data.flags['inputs'][:], axis=-1))
        else:
            with open(config.bad_input_file, 'r') as handler:
                bad_input = pickle.load(handler)

        mlog.info("%d inputs flagged as bad." % bad_input.size)
        bad_prod = np.array([
            ii for ii, pp in enumerate(prod)
            if (pp[0] in bad_input) or (pp[1] in bad_input)
        ])

        # Determine time range of each file
        findex = []
        tindex = []
        for ii, filename in enumerate(files):
            subdata = andata.CorrData.from_acq_h5(filename, datasets=())

            findex += [ii] * subdata.ntime
            tindex += range(subdata.ntime)

        findex = np.array(findex[start:stop])
        tindex = np.array(tindex[start:stop])

        frng = []
        for ii in range(nfiles):

            this_file = np.flatnonzero(findex == ii)
            this_tindex = tindex[this_file]

            frng.append((this_tindex.min(), this_tindex.max() + 1))

        # Create arrays to hold the results
        ores = {}
        ores['index_map'] = {}
        ores['index_map']['ra'] = ra
        ores['index_map']['time'] = data.time
        ores['index_map']['freq'] = freq
        ores['index_map']['offsets'] = np.array(config.offsets)
        ores['index_map']['pol'] = pol

        ores['evalue'] = np.zeros((noffset, nfreq, ntime, N), dtype=np.float32)
        ores['resp'] = np.zeros((noffset, nfreq, ntime, N, config.neigen),
                                dtype=np.complex64)
        ores['resp_err'] = np.zeros((noffset, nfreq, ntime, N, config.neigen),
                                    dtype=np.float32)

        # Loop over frequencies
        for ff, find in enumerate(freq_index):

            mlog.info("Freq %d of %d." % (ff + 1, nfreq))

            cnt = 0
            ev, evec = None, None

            if ncal_files > 0:

                ifc = int(np.argmin(np.abs(freq[ff] - cal_rdr.freq)))

                diff_time = np.abs(data.time[cnt] - cal_rdr.time)
                good_diff = np.flatnonzero(np.isfinite(diff_time))
                itc = int(good_diff[np.argmin(diff_time[good_diff])]) - 1

                print good_diff.size
                print data.time[cnt], cal_rdr.time[itc]

                cal = andata.CorrData.from_acq_h5(cal_files,
                                                  datasets=['eval', 'evec'],
                                                  freq_sel=ifc,
                                                  start=itc,
                                                  stop=itc + 2)

                print cal.time

                mlog.info(
                    "Using eigenvectors from %d time sample (%0.2f sec offset) to initialize backfill."
                    % (itc, cal.time[0] - data.time[cnt]))

                mlog.info(
                    "Using eigenvectors for freq %0.2f MHz (for %0.2f MHz)." %
                    (cal.freq[0], freq[ff]))

                ev = cal['eval'][0, ::-1, 0]
                evec = cal['evec'][0, ::-1, :, 0].T

                mlog.info("Initial eval shape %s, evec shape %s" %
                          (str(ev.shape), str(evec.shape)))

            # Loop over files
            for ii, filename in enumerate(files):

                aa, bb = frng[ii]

                # Loop over times
                for tt in range(aa, bb):

                    t0 = time.time()

                    mlog.info("Time %d of %d." % (cnt + 1, ntime))

                    # Load visibilities
                    with h5py.File(filename, 'r') as hf:

                        vis = hf['vis'][find, :, tt]

                    # Set bad products equal to zero
                    vis[bad_prod] = 0.0

                    # Different code if we are separating polarisations
                    if config.sep_pol:

                        if not any(prod_ss):

                            for pind, pp in enumerate(prod):
                                if (pp[0] in pol_s) and (pp[1] in pol_s):
                                    prod_ss.append(pind)

                                elif (pp[0] in pol_e) and (pp[1] in pol_e):
                                    prod_ee.append(pind)

                            prod_ss = np.array(prod_ss)
                            prod_ee = np.array(prod_ee)

                            mlog.info("Product sizes: %d, %d" %
                                      (prod_ss.size, prod_ee.size))

                        # Loop over polarisations
                        for pp, (input_pol,
                                 prod_pol) in enumerate([(pol_s, prod_ss),
                                                         (pol_e, prod_ee)]):

                            visp = vis[prod_pol]

                            mlog.info("pol %s, visibility size:  %d" %
                                      (pol[pp], visp.size))

                            # Loop over offsets
                            for oo, off in enumerate(config.offsets):

                                mlog.info(
                                    "pol %s, rank %d, niter %d, offset %d, cross_pol %s, neigen %d, intracyl_diag %d"
                                    % (pol[pp], rank, config.niter, off,
                                       cross_pol, config.neigen,
                                       int(config.intracyl_diag)))

                                ev, evec, rr, rre = solve_gain(
                                    visp,
                                    cutoff=off,
                                    intracyl_diag=config.intracyl_diag,
                                    cross_pol=cross_pol,
                                    normalize=config.normalize,
                                    niter=config.niter,
                                    neigen=config.neigen,
                                    rank=rank,
                                    cyl_size=config.cyl_size,
                                    time_iter=config.time_iter,
                                    eigenvalue=ev,
                                    eigenvector=evec)

                                ores['evalue'][oo, ff, cnt, input_pol] = ev
                                ores['resp'][oo, ff, cnt, input_pol, :] = rr
                                ores['resp_err'][oo, ff, cnt,
                                                 input_pol, :] = rre

                    else:

                        # Loop over offsets
                        for oo, off in enumerate(config.offsets):

                            mlog.info(
                                "rank %d, niter %d, offset %d, cross_pol %s, neigen %d, intracyl_diag %d"
                                % (rank, config.niter, off, cross_pol,
                                   config.neigen, int(config.intracyl_diag)))

                            ev, evec, rr, rre = solve_gain(
                                vis,
                                cutoff=off,
                                intracyl_diag=config.intracyl_diag,
                                cross_pol=cross_pol,
                                normalize=config.normalize,
                                niter=config.niter,
                                neigen=config.neigen,
                                rank=rank,
                                cyl_size=config.cyl_size,
                                time_iter=config.time_iter,
                                eigenvalue=ev,
                                eigenvector=evec)

                            ores['evalue'][oo, ff, cnt, :] = ev
                            ores['resp'][oo, ff, cnt, :, :] = rr
                            ores['resp_err'][oo, ff, cnt, :, :] = rre

                    # Increment time counter
                    cnt += 1

                    # Print time elapsed
                    mlog.info("Took %0.1f seconds." % (time.time() - t0, ))

        # Save to pickle file
        with h5py.File(output_file, 'w') as handler:

            handler.attrs['src'] = src
            handler.attrs['csd'] = csd

            for key, val in ores.iteritems():

                if isinstance(val, dict):
                    group = handler.create_group(key)
                    for kk, vv in val.iteritems():
                        group.create_dataset(kk, data=vv[:])
                else:
                    handler.create_dataset(key, data=val[:])

        # Remove this source from list
        if config.single_csd:
            config.all_sources.remove(src)