Esempio n. 1
0
def main():
    parser = argparse.ArgumentParser(formatter_class=argparse.\
    RawDescriptionHelpFormatter, description=textwrap.dedent("""
    Degenerately transform, align and merge redundantly calibrated
    dataframes
    """))
    parser.add_argument('jd_time',
                        help='Fractional JD time of dataframe to \
                        align other dataframes to',
                        metavar='JD',
                        type=str)
    parser.add_argument('-j', '--jd_comp', required=True, metavar='J', \
                        type=str, help='JDs of dataframes to align')
    parser.add_argument('-a', '--jd_anchor', required=True, metavar='A', \
                        type=int, help='JD of anchor day so that all other JDs \
                        are transformed to the degenerate space of this day'                                                                            )
    parser.add_argument('-p', '--pol', required=True, metavar='P', type=str, \
                        help='Polarization {"ee", "en", "nn", "ne"}')
    parser.add_argument('-d', '--dist', required=True, metavar='D', \
                        type=str, help='Noise distribution for calibration \
                        {"cauchy", "gaussian"}'                                               )
    parser.add_argument('-r', '--rel_dir', required=False, default='rel_dfs', metavar='R', \
                        type=str, help='Directory in which relative calibration \
                        results dataframes are located'                                                       )
    parser.add_argument('-g', '--deg_dir', required=False, default='deg_dfs', metavar='G', \
                        type=str, help='Directory in which degenerate comparison \
                        results dataframes are located'                                                       )
    parser.add_argument('-o', '--out', required=False, default=None, \
                        metavar='O', type=str, help='Output csv and df name')
    parser.add_argument('-u', '--out_dir', required=False, default=None, metavar='U', \
                        type=str, help='Out directory to store dataframe')
    args = parser.parse_args()

    startTime = datetime.datetime.now()

    out_fn = args.out
    if out_fn is None:
        last_df = pd.read_pickle('jd_lst_map_idr2.pkl')
        last = last_df[last_df['JD_time'] == float(
            args.jd_time)]['LASTs'].values[0][0]
        out_fn = 'aligned_red_deg.{}.{}.{}'.format('{:.4f}'.format(last), \
                                                args.pol, args.dist)
    if args.out_dir is not None:
        if not os.path.exists(args.out_dir):
            os.mkdir(args.out_dir)
        out_fn = os.path.join(args.out_dir, out_fn)
    out_pkl = fn_format(out_fn, 'pkl')

    jd_comp = args.jd_comp
    if jd_comp == 'idr2_jds':
        jd_comp = numpy.asarray(idr2_jds)
    elif jd_comp == 'idr2_jdsx':
        jd_comp = numpy.asarray(idr2_jdsx)
    else:
        if '_' in jd_comp:
            jd_comp = numpy.asarray(jd_comp.split('_'), dtype=int)
        else:
            jd_comp = mod_str_arg(args.jd_comp)
        jd_comp = numpy.intersect1d(jd_comp, idr2_jds)
    jdl_day = int(float(args.jd_time))
    if jdl_day in jd_comp:
        jd_comp = numpy.delete(jd_comp, numpy.where(jd_comp == jdl_day)[0])

    print('Finding relatively calibrated dataframes on JDs {} ({} polarization '\
          'and {} assumed noise during the calibration) and aligning them '\
          'in LAST to dataset {}, with all relatively calibrated datasets being '\
          'transformed to the degenerate space of {}.\n'.\
          format(' '.join(map(str, numpy.sort(numpy.append([jdl_day], jd_comp)))),
                 args.pol, args.dist, args.jd_time, args.jd_anchor))

    if os.path.exists(out_pkl):
        print('Overwriting {}.\n'.format(out_pkl))

    indices = ['freq', 'time_int']
    resid_cols = ['residual', 'norm_residual']
    min_list = ['success', 'status', 'message', 'fun', 'nit']

    with open(os.path.join(args.rel_dir, 'rel_df.{}.{}.md.pkl'.format(args.jd_time, \
              args.pol)), 'rb') as f:
        md = pickle.load(f)

    vis_list = list(map(str, numpy.arange(md['no_unq_bls'] * 2).tolist()))
    gain_list = list(map(str, numpy.arange(md['no_unq_bls']*2, (md['no_unq_bls'] + \
                     md['no_ants'])*2 ).tolist()))

    rel_df_path = find_rel_df(args.jd_time, args.pol, args.dist, args.rel_dir)
    rel_df = pd.read_pickle(rel_df_path)
    rel_df.drop(columns=resid_cols + gain_list, inplace=True)

    if int(float(args.jd_time)) != args.jd_anchor:
        rel_df_d = rel_df[min_list].copy()
        rel_df_d = rel_df_d.reindex(columns=rel_df_d.columns.values.tolist() +
                                    vis_list)

        deg_df_path = find_deg_df(args.jd_time, args.pol, 'jd.{}'.format(args.jd_anchor), \
                                  args.dist, args.deg_dir)
        deg_df = pd.read_pickle(deg_df_path)
        deg_df_d = deg_df[['0', '1', '2']].copy().reset_index()
        deg_df_d.rename(columns={'time_int1': 'time_int', '0': 'amp', '1': 'tilt_x', \
                                 '2':'tilt_y'}, inplace=True)
        deg_df_d.set_index(indices, inplace=True)
        deg_df_d.sort_index(inplace=True)

        rel_df = rel_df.join(deg_df_d)

        ant_sep = red_ant_sep(md['redg'], md['antpos'])
        rel_df_d[vis_list] = rel_df.apply(lambda row: pd.Series(decomposeCArray(\
            degVis(ant_sep, makeCArray(row[len(min_list):len(min_list) + \
            md['no_unq_bls']*2].values.astype(float)), *row[-3:].values.astype(float)))), \
            axis=1)
    else:
        rel_df_d = rel_df

    new_indices = ['freq', 'time_int', 'JD']
    rel_df_d['JD'] = int(float(args.jd_time))
    rel_df_d.reset_index(inplace=True)
    rel_df_d.set_index(new_indices, inplace=True)
    rel_df_d.sort_index(inplace=True)

    avaiable_jds = numpy.unique([
        os.path.basename(df).split('.')[1] for df in glob.glob(
            os.path.join(os.getcwd(), args.rel_dir, 'rel_df*pkl'))
    ]).astype(int)
    for jd_ci in jd_comp:
        if jd_ci in avaiable_jds:
            print('Aligning and adding {} to the resulting dataframe'.format(
                jd_ci))
            rel_dfk = align_df('rel', args.jd_time, jd_ci, args.rel_dir, args.dist, \
                               args.pol)
            rel_dfk.drop(columns=resid_cols + gain_list, inplace=True)

            if int(jd_ci) != args.jd_anchor:
                deg_dfk = align_df('deg', args.jd_time, jd_ci, args.deg_dir, args.dist, \
                                   args.pol, JD_anchor=args.jd_anchor)

                # Degenerate transformation of redundant visibility solutions
                deg_dfk = deg_dfk[['0', '1', '2']].copy().reset_index()
                deg_dfk.rename(columns={'time_int1': 'time_int', '0': 'amp', \
                                        '1': 'tilt_x', '2':'tilt_y'}, inplace=True)
                deg_dfk.set_index(indices, inplace=True)
                deg_dfk.sort_index(inplace=True)
                rel_dfk = rel_dfk.join(deg_dfk)

                rel_df_di = rel_dfk[min_list].copy()
                rel_df_di = rel_df_di.reindex(columns=rel_df_di.columns.values.tolist() \
                                                    +vis_list)
                rel_df_di[vis_list] = rel_dfk.apply(lambda row: pd.Series(decomposeCArray(\
                    degVis(ant_sep, makeCArray(row[len(min_list):len(min_list) + \
                    md['no_unq_bls']*2].values.astype(float)), *row[-3:].values.astype(float)))), \
                    axis=1)
                rel_dfk = rel_df_di

            rel_dfk['JD'] = jd_ci
            rel_dfk.reset_index(inplace=True)
            rel_dfk.set_index(new_indices, inplace=True)
            rel_dfk.sort_index(inplace=True)
            rel_df_d = pd.concat([rel_df_d, rel_dfk])
        else:
            print(
                'No relative calibration dataframes available for {}: skipping.'
                .format(jd_ci))

    rel_df_d.sort_index(inplace=True)
    rel_df_d.to_pickle(out_pkl)
    print('Aligned dataframe pickled to {}'.format(out_pkl))

    print('Script run time: {}'.format(datetime.datetime.now() - startTime))
Esempio n. 2
0
def main():
    parser = argparse.ArgumentParser(formatter_class=argparse.\
    RawDescriptionHelpFormatter, description=textwrap.dedent("""
    Relative redundant calibration of visibilities

    Takes a given HERA visibility dataset in uvh5 file format and performs
    relative redundant calibration (up to the overall amplitude, overall
    phase, and phase gradient degenerate parameters) for each frequency channel
    and each time integration in the dataset.

    Returns a pickled pandas dataframe of the Scipy optimization results for
    the relative redundant calibration for each set of frequency channel and
    time integration.
    """))
    parser.add_argument('jd_time',
                        help='Fractional JD time of dataset to \
                        calibrate',
                        metavar='JD',
                        type=str)
    parser.add_argument('-o', '--out', required=False, default=None, \
                        metavar='O', type=str, help='Output csv and df name')
    parser.add_argument('-p', '--pol', required=True, metavar='P', type=str, \
                        help='Polarization {"ee", "en", "nn", "ne"}')
    parser.add_argument('-c', '--chans', required=False, default=None, metavar='C', \
                        type=str, help='Frequency channels to calibrate \
                        {0, 1023}'                                  )
    parser.add_argument('-t', '--tints', required=False, default=None, metavar='T', \
                        type=str, help='Time integrations to calibrate \
                        {0, 59}'                                )
    parser.add_argument('-f', '--flag_type', required=False, default='first', \
                        metavar='F', type=str, help='Flag type e.g. "first", \
                        "omni", "abs"'                                      )
    parser.add_argument('-d', '--dist', required=True, metavar='D', \
                        type=str, help='Fitting distribution for calibration \
                        {"cauchy", "gaussian"}'                                               )
    parser.add_argument('-m', '--method', required=False, default='cartesian', \
                        metavar='M', type=str, help='Method to use - {"cartesian", \
                        "polar", "RP"}, where RP stands for reduced parameters'                                                                               )
    parser.add_argument('-l', '--logamp', required=False, action='store_true', \
                        help='Use logamp method to force positive gain amplitudes')
    parser.add_argument('-g', '--tilt_reg', required=False, action='store_true', \
                        help='Add regularization term to constrain tilt shifts to 0')
    parser.add_argument('-a', '--gphase_reg', required=False, action='store_true', \
                        help='Add regularization term to constrain the gain phase mean')
    parser.add_argument('-i', '--initp_jd', required=False, default=None, metavar='I', \
                        type=int, help='JD of to find datasets to reuse initial parameters')
    parser.add_argument('-v', '--noise', required=False, action='store_true', \
                        help='Use noise from autos in nlogL calculations')
    parser.add_argument('-u', '--out_dir', required=False, default=None, metavar='U', \
                        type=str, help='Out directory to store dataframe')
    parser.add_argument('-n', '--new_df', required=False, action='store_true', \
                        help='Write data to a new dataframe')
    args = parser.parse_args()

    startTime = datetime.datetime.now()

    out_fn = args.out
    default_fn = 'rel_df.{}.{}.{}'.format(args.jd_time, args.pol, args.dist)
    if out_fn is None:
        out_fn = default_fn
    if args.out_dir is not None:
        if not os.path.exists(args.out_dir):
            os.mkdir(args.out_dir)
        out_fn = os.path.join(args.out_dir, out_fn)
        if out_fn is not None:
            default_fn = os.path.join(args.out_dir, default_fn)

    out_csv = fn_format(out_fn, 'csv')
    out_pkl = out_csv.rsplit('.', 1)[0] + '.pkl'
    csv_exists = os.path.exists(out_csv)
    pkl_exists = os.path.exists(out_pkl)
    if csv_exists or pkl_exists:
        if args.new_df:
            out_csv = new_fn(out_csv, None, startTime)
            out_pkl = out_csv.rsplit('.', 1)[0] + '.pkl'
            csv_exists = False
            pkl_exists = False

    zen_fn = find_zen_file(args.jd_time)
    bad_ants = get_bad_ants(zen_fn)

    flag_type = args.flag_type
    if flag_type is not None:
        flag_fn = find_flag_file(args.jd_time, flag_type)
    else:
        flag_fn = None

    freq_chans = mod_str_arg(args.chans)
    time_ints = mod_str_arg(args.tints)

    hd = HERAData(zen_fn)

    pchans = args.chans
    if pchans is None:
        pchans = '0~{}'.format(hd.Nfreqs - 1)
    ptints = args.tints
    if ptints is None:
        ptints = '0~{}'.format(hd.Ntimes - 1)
    print('Running relative redundant calibration on visibility dataset {} for '\
          'polarization {}, frequency channel(s) {} and time integration(s) {} '\
          'with {} assumed noise distribution\n'.\
          format(os.path.basename(zen_fn), args.pol, pchans, ptints, args.dist))

    if freq_chans is None:
        freq_chans = numpy.arange(hd.Nfreqs)
    if time_ints is None:
        time_ints = numpy.arange(hd.Ntimes)

    indices = ['freq', 'time_int']

    no_tints = len(time_ints)
    iter_dims = list(numpy.ndindex((len(freq_chans), no_tints)))
    skip_cal = False
    # skipping freqs and tints that are already in the dataframe
    if csv_exists or pkl_exists:
        cmap_f = dict(map(reversed, enumerate(freq_chans)))
        cmap_t = dict(map(reversed, enumerate(time_ints)))
        if csv_exists:
            df = pd.read_csv(out_csv, usecols=indices)
            idx_arr = df.values
        elif pkl_exists:
            df_pkl = pd.read_pickle(out_pkl)
            idx_arr = df_pkl.index.values
        done = [(cmap_f[f], cmap_t[t]) for (f, t) in idx_arr if (f in freq_chans \
        and t in time_ints)]
        iter_dims = [idim for idim in iter_dims if idim not in done]
        if not any(iter_dims):
            print('Solutions to all specified frequency channels and time '\
                  'integrations already exist in {}\n'.format(out_pkl))
            skip_cal = True

    if not skip_cal:
        grp = group_data(zen_fn, args.pol, freq_chans, time_ints, \
                         bad_ants, flag_path=flag_fn, noise=args.noise)
        if not args.noise:
            _, RedG, cData = grp
            noisec = None
        else:
            _, RedG, cData, cNData = grp

        flags = cData.mask
        cData = cData.data

        # to get fields for the csv header
        ants = numpy.unique(RedG[:, 1:])
        no_ants = ants.size
        no_unq_bls = numpy.unique(RedG[:, 0]).size
        cRedG = relabelAnts(RedG)
        psize = (no_ants + no_unq_bls) * 2
        if args.tilt_reg:
            ant_pos_arr = flt_ant_pos(hd.antpos, ants)
        else:
            ant_pos_arr = None

        # discarding 'jac', 'hess_inv', 'nfev', 'njev'
        slct_keys = ['success', 'status', 'message', 'fun', 'nit', 'x']
        header = slct_keys[:-1] + list(numpy.arange(psize)) + indices

        # remove flagged channels from iter_dims
        if True in flags:
            flg_chans = numpy.where(flags.all(axis=(1, 2)))[0]  # indices
            print('Flagged channels for visibility dataset {} are: {}\n'.\
                 format(os.path.basename(zen_fn), freq_chans[flg_chans]))
            iter_dims = [
                idim for idim in iter_dims if idim[0] not in flg_chans
            ]
            if not iter_dims:  # check if slices to solve are empty
                print('All specified channels are flagged. Exiting.')
                sys.exit()

        if args.initp_jd is not None:
            jd_time2 = match_lst(args.jd_time, args.initp_jd)
            if len(str(jd_time2)) < 13:
                jd_time2 = str(
                    jd_time2
                ) + '0'  # add a trailing 0 that is omitted in float
            rel_df_path1 = find_rel_df(jd_time2, args.pol, args.dist)
            if isinstance(jd_time2, str):
                jd_time2 = float(jd_time2)

            last_df = pd.read_pickle('jd_lst_map_idr2.pkl')
            last1 = last_df[last_df['JD_time'] == float(
                args.jd_time)]['LASTs'].values[0]
            last2 = last_df[last_df['JD_time'] == jd_time2]['LASTs'].values[0]
            _, offset = find_nearest(last2, last1[0])

            rel_df1 = pd.read_pickle(rel_df_path1)
            rel_df1 = rel_df1[
                rel_df1.index.get_level_values('time_int') >= offset]

            next_row = numpy.where(last_df['JD_time'] == jd_time2)[0][0] + 1
            rel_df_path2 = find_rel_df(last_df.iloc[next_row]['JD_time'], args.pol, \
                                       args.dist)
            rel_df2 = pd.read_pickle(rel_df_path2)
            rel_df2 = rel_df2[
                rel_df2.index.get_level_values('time_int') < offset]

            rel_df_c = pd.concat([rel_df1, rel_df2])

            # filter by specified channels and time integrations
            time_ints_offset = (time_ints + offset) % hd.Ntimes
            freq_flt = numpy.in1d(rel_df_c.index.get_level_values('freq'),
                                  freq_chans)
            tint_flt = numpy.in1d(rel_df_c.index.get_level_values('time_int'),
                                  time_ints_offset)
            rel_df_c = rel_df_c[freq_flt & tint_flt]

            time_ints2 = numpy.tile(
                rel_df_c.index.get_level_values('time_int').unique().values,
                freq_chans.size)
            iter_dims = [
                idim + (tint, ) for idim, tint in zip(iter_dims, time_ints2)
            ]

            phase_reg_initp = True
        else:
            phase_reg_initp = False


        def cal(credg, distribution, coords, no_unq_bls, no_ants, logamp, \
                tilt_reg, gphase_reg, ant_pos_arr, obsvis, noise, initp):
            """Relative redundant calibration with doRelCal: unconstrained
            minimizer using cartesian coordinates - this is the fastest solver

            :param credg: Grouped baselines, condensed so that antennas are
            consecutively labelled. See relabelAnts
            :type credg: ndarray
            :param distribution: Distribution to fit likelihood {'gaussian', 'cauchy'}
            :type distribution: str
            :param coords: Coordinate system in which gain and visibility parameters
            have been set up
            :type coords: str {"cartesian", "polar"}
            :param no_unq_bls: Number of unique baselines (equivalently the number of
            redundant visibilities)
            :type no_unq_bls: int
            :param no_ants: Number of antennas for given observation
            :type no_ants: int
            :param logamp: The logarithm of the amplitude initial parameters is taken,
            such that only positive solutions can be returned. Only if coords=="polar".
            :type logamp: bool
            :param tilt_reg: Add regularization term to constrain tilt shifts to 0
            :type tilt_reg: bool
            :param gphase_reg: Add regularization term to constrain the gain phase mean
            :type gphase_reg: bool
            :param ant_pos_arr: Array of filtered antenna position coordinates for the antennas
            in ants. See flt_ant_pos.
            :type ant_pos_arr: ndarray
            :param obsvis: Observed sky visibilities for a given frequency and given time,
            reformatted to have format consistent with redg
            :type obsvis: ndarray
            :param noise: Noise array to feed into log-likelihood calculations
            :type noise: ndarray
            :param initp: Initial parameter guesses for true visibilities and gains
            :type initp: ndarray, None

            :return: Optimization result for the solved antenna gains and true sky
            visibilities
            :rtype: Scipy optimization result object
            """
            res_rel, initp_new = doRelCal(credg, obsvis, no_unq_bls, no_ants, \
                coords=coords, distribution=distribution, noise=noise, \
                norm_gains=True, logamp=logamp, tilt_reg=tilt_reg, \
                gphase_reg=gphase_reg, ant_pos_arr=ant_pos_arr, initp=initp, \
                return_initp=True, phase_reg_initp=phase_reg_initp)
            res_rel = {key: res_rel[key] for key in slct_keys}
            # use solution for next solve in iteration
            if res_rel['success']:
                initp = initp_new
            return res_rel, initp

        def cal_RP(credg, distribution, no_unq_bls, no_ants, logamp, \
                   tilt_reg, gphase_reg, ant_pos_arr, obsvis, noise, initp):
            """Relative redundant calibration with doRelCalRP: constrained
            minimizer (by reducing the number of parameters) using polar
            coordinates

            :param credg: Grouped baselines, condensed so that antennas are
            consecutively labelled. See relabelAnts
            :type credg: ndarray
            :param distribution: Distribution to fit likelihood {'gaussian', 'cauchy'}
            :type distribution: str
            :param no_unq_bls: Number of unique baselines (equivalently the number of
            redundant visibilities)
            :type no_unq_bls: int
            :param no_ants: Number of antennas for given observation
            :type no_ants: int
            :param logamp: The logarithm of the amplitude initial parameters is taken,
            such that only positive solutions can be returned. Only if coords=="polar".
            :type logamp: bool
            :param tilt_reg: Add regularization term to constrain tilt shifts to 0
            :type tilt_reg: bool
            :param gphase_reg: Add regularization term to constrain the gain phase mean
            :type gphase_reg: bool
            :param ant_pos_arr: Array of filtered antenna position coordinates for the antennas
            in ants. See flt_ant_pos.
            :type ant_pos_arr: ndarray
            :param obsvis: Observed sky visibilities for a given frequency and given time,
            reformatted to have format consistent with redg
            :type obsvis: ndarray
            :param noise: Noise array to feed into log-likelihood calculations
            :type noise: ndarray
            :param initp: Initial parameter guesses for true visibilities and gains
            :type initp: ndarray, None

            :return: Optimization result for the solved antenna gains and true sky
            visibilities
            :rtype: Scipy optimization result object
            """
            res_rel, initp_ = doRelCalRP(credg, obsvis, no_unq_bls, no_ants, \
                distribution=distribution, noise=noise, constr_phase=True, \
                amp_constr='prod', bounded=True, logamp=logamp, tilt_reg=tilt_reg, \
                gphase_reg=gphase_reg, ant_pos_arr=gphase_reg, initp=initp)
            res_rel = {key: res_rel[key] for key in slct_keys}
            # use solution for next solve in iteration
            if res_rel['success']:
                initp = initp_
            return res_rel, initp

        if args.method.upper() == 'RP':
            RelCal = functools.partial(cal_RP, cRedG, args.dist, no_unq_bls, no_ants, \
                                       args.logamp, args.tilt_reg, args.gphase_reg, \
                                       ant_pos_arr)
            coords = 'polar'
        else:
            RelCal = functools.partial(cal, cRedG, args.dist, args.method, no_unq_bls, \
                                       no_ants, args.logamp, args.tilt_reg, \
                                       args.gphase_reg, ant_pos_arr)
            coords = args.method

        stdout = io.StringIO()
        with redirect_stdout(stdout):  # suppress output
            with open(out_csv, 'a') as f:  # write / append to csv file
                writer = DictWriter(f, fieldnames=header)
                if not csv_exists:
                    writer.writeheader()
                initp = None
                for i, iter_dim in enumerate(iter_dims):
                    if args.initp_jd is not None:
                        initp = rel_df_c.loc[(freq_chans[iter_dim[0]], iter_dim[2])]\
                                [len(slct_keys[:-1]):-2].values.astype(float)
                    if args.noise:
                        noisec = cNData[iter_dim[:2]]
                    res_rel, initp = RelCal(cData[iter_dim[:2]], noisec, initp)
                    # expanding out the solution
                    for j, param in enumerate(res_rel['x']):
                        res_rel[j] = param
                    # reset initp after each frequency slice
                    if not (i + 1) % no_tints and args.initp_jd is None:
                        initp = None
                    del res_rel['x']
                    res_rel.update({indices[0]:freq_chans[iter_dim[0]], \
                                    indices[1]:time_ints[iter_dim[1]]})
                    writer.writerow(res_rel)

        print('Relative calibration results saved to csv file {}'.format(
            out_csv))
        df = pd.read_csv(out_csv)
        if csv_exists:
            freqs = df['freq'].unique()
            tints = df['time_int'].unique()
            if cData.shape[0] != freqs.size or cData.shape[1] != tints.size:
                _, _, cData = group_data(zen_fn, args.pol, freqs, tints, \
                                         bad_ants, flag_path=flag_fn)
                cData = cData.data
        df.set_index(indices, inplace=True)
        # we now append the residuals as additional columns
        df = append_residuals_rel(df, cData, cRedG, coords, out_fn=None)
        if pkl_exists and not csv_exists:
            df = pd.concat([df, df_pkl])
        df.sort_values(by=indices, inplace=True)
        df.to_pickle(out_pkl)
        print('Relative calibration results dataframe pickled to {}'.format(
            out_pkl))

        # creating metadata file
        out_md = default_fn.rsplit('.', 1)[0] + '.md.pkl'
        if not os.path.exists(out_md):
            md = {'no_ants':no_ants, 'no_unq_bls':no_unq_bls, 'redg':RedG, \
                  'antpos':hd.antpos, 'last':hd.lsts, 'Nfreqs':hd.Nfreqs, \
                  'Ntimes':hd.Ntimes}
            with open(out_md, 'wb') as f:
                pickle.dump(md, f, protocol=pickle.HIGHEST_PROTOCOL)
            print(
                'Relative calibration metadata pickled to {}\n'.format(out_md))

    print('Script run time: {}'.format(datetime.datetime.now() - startTime))
Esempio n. 3
0
def main():
    parser = argparse.ArgumentParser(formatter_class=argparse.\
    RawDescriptionHelpFormatter, description=textwrap.dedent("""
    Across days relative redundant calibration of visibilities

    Takes HERA visibility datasets across several JDs in uvh5 file format,
    aligns them in LAST and then performs relative redundant calibration
    (up to the overall amplitude, overall phase, and phase gradient degenerate
    parameters) for each frequency channel and each time integration in the dataset.

    Returns a pickled pandas dataframe of the Scipy optimization results for
    the relative redundant calibration for each set of frequency channel and
    time integration.
    """))
    parser.add_argument('jd_time',
                        help='Fractional JD time of dataset to \
                        align other dataframes to',
                        metavar='JD',
                        type=str)
    parser.add_argument('-j', '--jds', required=True, metavar='J', \
                        type=str, help='JDs to calibrate')
    parser.add_argument('-p', '--pol', required=True, metavar='P', type=str, \
                        help='Polarization {"ee", "en", "nn", "ne"}')
    parser.add_argument('-c', '--chans', required=False, default=None, metavar='C', \
                        type=str, help='Frequency channels to calibrate \
                        {0, 1023}'                                  )
    parser.add_argument('-t', '--tints', required=False, default=None, metavar='T', \
                        type=str, help='Time integrations to calibrate \
                        {0, 59}'                                )
    parser.add_argument('-f', '--flag_type', required=False, default='first', \
                        metavar='F', type=str, help='Flag type e.g. "first", \
                        "omni", "abs"'                                      )
    parser.add_argument('-d', '--dist', required=True, metavar='D', \
                        type=str, help='Fitting distribution for calibration \
                        {"cauchy", "gaussian"}'                                               )
    parser.add_argument('-v', '--noise', required=False, action='store_true', \
                        help='Use noise from autos in nlogL calculations')
    parser.add_argument('-cf', '--chan_flag_pct', required=False, default=None, \
                        metavar='CFP', type=float, help='Flag channel if more than \
                        X% of day/time slices for a given channel are flagged'                                                                              )
    parser.add_argument('-o', '--out', required=False, default=None, \
                        metavar='O', type=str, help='Output csv and df name')
    parser.add_argument('-u', '--out_dir', required=False, default=None, metavar='U', \
                        type=str, help='Out directory to store dataframe')
    parser.add_argument('-n', '--new_df', required=False, action='store_true', \
                        help='Write data to a new dataframe')
    parser.add_argument('-k', '--compression', required=False, default=None, metavar='K', \
                        type=str, help='Compression to use when pickling results dataframe')
    args = parser.parse_args()

    startTime = datetime.datetime.now()

    zen_fn = find_zen_file(args.jd_time)
    hd = HERAData(zen_fn)

    out_fn = args.out
    default_fn = 'xd_rel_df.{}.{}.{}'.format('{:.4f}'.format(hd.lsts[0]), \
                                             args.pol, args.dist)
    if out_fn is None:
        out_fn = default_fn
    if args.out_dir is not None:
        if not os.path.exists(args.out_dir):
            os.mkdir(args.out_dir)
        out_fn = os.path.join(args.out_dir, out_fn)
        if out_fn is not None:
            default_fn = os.path.join(args.out_dir, default_fn)

    out_csv = fn_format(out_fn, 'csv')
    out_pkl = out_csv.rsplit('.', 1)[0] + '.pkl'
    csv_exists = os.path.exists(out_csv)
    pkl_exists = os.path.exists(out_pkl)
    if csv_exists or pkl_exists:
        if args.new_df:
            out_csv = new_fn(out_csv, None, startTime)
            out_pkl = out_csv.rsplit('.', 1)[0] + '.pkl'
            csv_exists = False
            pkl_exists = False

    JDs = args.jds
    if JDs == 'idr2_jds':
        JDs = numpy.asarray(idr2_jds)
    elif JDs == 'idr2_jdsx':
        JDs = numpy.asarray(idr2_jdsx)
    else:
        if '_' in JDs:
            JDs = numpy.asarray(JDs.split('_'), dtype=int)
        else:
            JDs = mod_str_arg(JDs)
        JDs = numpy.intersect1d(JDs, idr2_jds)

    freq_chans = mod_str_arg(args.chans)
    time_ints = mod_str_arg(args.tints)

    pchans = args.chans
    if pchans is None:
        pchans = '0~{}'.format(hd.Nfreqs - 1)
    ptints = args.tints
    if ptints is None:
        ptints = '0~{}'.format(hd.Ntimes - 1)
    print('Running relative redundant calibration across JDs {} between LASTS '\
          '{:.4f} and {:.4f} for polarization {}, frequency channel(s) {} '\
          'and time integration(s) {}, with {} assumed noise distribution.\n'.\
          format(' '.join(map(str, JDs)), hd.lsts[0], hd.lsts[-1], args.pol, \
                 pchans, ptints, args.dist))

    if freq_chans is None:
        freq_chans = numpy.arange(hd.Nfreqs)
    if time_ints is None:
        time_ints = numpy.arange(hd.Ntimes)

    indices = ['freq', 'time_int']

    no_tints = len(time_ints)
    iter_dims = list(numpy.ndindex((len(freq_chans), no_tints)))
    skip_cal = False
    # skipping freqs and tints that are already in the dataframe
    if csv_exists or pkl_exists:
        cmap_f = dict(map(reversed, enumerate(freq_chans)))
        cmap_t = dict(map(reversed, enumerate(time_ints)))
        if csv_exists:
            df = pd.read_csv(out_csv, usecols=indices)
            idx_arr = df.values
        elif pkl_exists:
            df_pkl = pd.read_pickle(out_pkl)
            idx_arr = df_pkl.index.values
        done = [(cmap_f[f], cmap_t[t]) for (f, t) in idx_arr if (f in freq_chans \
        and t in time_ints)]
        iter_dims = [idim for idim in iter_dims if idim not in done]
        if not any(iter_dims):
            print('Solutions to all specified frequency channels and time '\
                  'integrations already exist in {}\n'.format(out_pkl))
            skip_cal = True

    if not skip_cal:
        stdout = io.StringIO()
        with redirect_stdout(stdout):  # suppress output
            grp = XDgroup_data(args.jd_time, JDs, args.pol, chans=freq_chans, \
                            tints=time_ints, use_flags=args.flag_type, \
                            noise=args.noise)
        if not args.noise:
            _, RedG, cData = grp
            noisec = None
        else:
            _, RedG, cData, cNData = grp

        flags = cData.mask
        cData = cData.data

        # to get fields for the csv header
        ants = numpy.unique(RedG[:, 1:])
        no_ants = ants.size
        no_unq_bls = numpy.unique(RedG[:, 0]).size
        cRedG = relabelAnts(RedG)
        psize = (no_ants * JDs.size + no_unq_bls) * 2

        # discarding 'jac', 'hess_inv', 'nfev', 'njev'
        slct_keys = ['success', 'status', 'message', 'fun', 'nit', 'x']
        header = slct_keys[:-1] + list(numpy.arange(psize)) + indices

        # remove flagged channels from iter_dims
        if isinstance(flags, numpy.bool_):
            # If all flags are the same
            flags = [flags]
        if True in flags:
            if args.chan_flag_pct is None:
                flg_chans = numpy.unique(
                    numpy.where(flags.all(axis=(0, 2, 3)))[0])
                print('Flagged channels across all days are: {}\n'.\
                      format(freq_chans[flg_chans]))
            else:
                flg_pct = args.chan_flag_pct / 100
                flg_chans = numpy.unique(numpy.where(flags.all(axis=3).mean(axis=(0, 2)) \
                                                     > flg_pct)[0])
                print('Flagged channels across all days and those that are '\
                      'more than {}% flagged for their given day/time slice are: {}\n'.\
                      format(args.chan_flag_pct, freq_chans[flg_chans] ))
            iter_dims = [
                idim for idim in iter_dims if idim[0] not in flg_chans
            ]
            if not iter_dims:  # check if slices to solve are empty
                print('All specified channels are flagged. Exiting.')
                sys.exit()

        def cal(credg, distribution, no_unq_bls, no_ants, obsvis, noise,
                initp):
            """Relative redundant calibration across days with doRelCalD:
            default implementation with unconstrained minimizer using cartesian
            coordinates
            """
            res_rel, initp_new = doRelCalD(credg, obsvis, no_unq_bls, no_ants, \
                distribution=distribution, noise=noise, initp=initp, \
                return_initp=True, xd=True)
            res_rel = {key: res_rel[key] for key in slct_keys}
            # use solution for next solve in iteration
            if res_rel['success']:
                initp = initp_new
            return res_rel, initp

        RelCal = functools.partial(cal, cRedG, args.dist, no_unq_bls, no_ants)

        with redirect_stdout(stdout):  # suppress output
            with open(out_csv, 'a') as f:  # write / append to csv file
                writer = DictWriter(f, fieldnames=header)
                if not csv_exists:
                    writer.writeheader()
                initp = None
                for i, iter_dim in enumerate(iter_dims):
                    if args.noise:
                        noisec = cNData[:, iter_dim[0], iter_dim[1], :]
                    res_rel, initp = RelCal(cData[:, iter_dim[0], iter_dim[1], :], \
                                            noisec, initp)
                    # expanding out the solution
                    for j, param in enumerate(res_rel['x']):
                        res_rel[j] = param
                    # reset initp after each frequency slice
                    if not (i + 1) % no_tints:
                        initp = None
                    del res_rel['x']
                    res_rel.update({indices[0]:freq_chans[iter_dim[0]], \
                                    indices[1]:time_ints[iter_dim[1]]})
                    writer.writerow(res_rel)

        print('Relative calibration results saved to csv file {}'.format(
            out_csv))
        df = pd.read_csv(out_csv)
        if csv_exists:
            freqs = df['freq'].unique()
            tints = df['time_int'].unique()
            if cData.shape[0] != freqs.size or cData.shape[1] != tints.size:
                _, _, cData = XDgroup_data(args.jd_time, JDs, args.pol, chans=freqs,
                                           tints=tints, use_flags=args.flag_type, \
                                           noise=None)
                cData = cData.data
        df.set_index(indices, inplace=True)
        # we now append the residuals as additional columns
        df = append_residuals_rel(df, cData, cRedG, 'cartesian', out_fn=None)
        if pkl_exists and not csv_exists:
            df = pd.concat([df, df_pkl])
        df.sort_values(by=indices, inplace=True)
        if args.compression is not None:
            out_pkl += '.{}'.format(args.compression)
            print('{} compression used in pickling the dataframe'.format(
                args.compression))
        df.to_pickle(out_pkl, compression=args.compression)
        print('Relative calibration results dataframe pickled to {}'.format(
            out_pkl))

        # creating metadata file
        out_md = default_fn.rsplit('.', 1)[0] + '.md.pkl'
        if not os.path.exists(out_md):
            md = {'no_ants':no_ants, 'no_unq_bls':no_unq_bls, 'redg':RedG, \
                  'antpos':hd.antpos, 'last':hd.lsts, 'Nfreqs':hd.Nfreqs, \
                  'Ntimes':hd.Ntimes, 'JDs':JDs}
            with open(out_md, 'wb') as f:
                pickle.dump(md, f, protocol=pickle.HIGHEST_PROTOCOL)
            print(
                'Relative calibration metadata pickled to {}\n'.format(out_md))

    print('Script run time: {}'.format(datetime.datetime.now() - startTime))
Esempio n. 4
0
def main():
    parser = argparse.ArgumentParser(formatter_class=argparse.\
    RawDescriptionHelpFormatter, description=textwrap.dedent("""
    Absolute optimal calibration of relatively calibrated visibility solutions

    Takes the relatively calibrated visibility solutions of HERA data, and
    constrains their degenerate parameters, such that average amplitude of
    antenna gains is set to 1, the average phase of antenna gains is set to 0,
    the overall phase if set to 0 and the phase gradients are 0.

    Returns a pickled pandas dataframe of the Scipy optimization results for
    the absolute optimal calibration.
    """))
    parser.add_argument('jd_time', help='Fractional JD time of dataset to \
                        analyze', metavar='JD', type=str)
    parser.add_argument('-o', '--out', required=False, default=None, \
                        metavar='O', type=str, help='Output csv and df name')
    parser.add_argument('-p', '--pol', required=True, metavar='P', type=str, \
                        help='Polarization {"ee", "en", "nn", "ne"}')
    parser.add_argument('-c', '--chans', required=False, default=None, metavar='C', \
                        type=str, help='Frequency channels to fit {0, 1023}')
    parser.add_argument('-t', '--tints', required=False, default=None, metavar='T', \
                        type=str, help='Time integrations to fit {0, 59}')
    parser.add_argument('-d', '--dist', required=True, metavar='D', \
                        type=str, help='Fitting distribution for calibration \
                        {"cauchy", "gaussian"}')
    parser.add_argument('-a', '--ref_ant_idx', required=False, default=16, metavar='A', \
                        type=int, help='Reference antenna index to set the overall \
                        phase')
    parser.add_argument('-l', '--logamp', required=False, action='store_true', \
                        help='Use logamp method to force positive gain amplitudes')
    parser.add_argument('-r', '--rel_dir', required=False, default=None, metavar='R', \
                        type=str, help='Directory in which relative calibration \
                        results dataframes are located')
    parser.add_argument('-u', '--out_dir', required=False, default=None, metavar='U', \
                        type=str, help='Out directory to store dataframe')
    parser.add_argument('-n', '--new_df', required=False, action='store_true', \
                        help='Write data to a new csv file')
    parser.add_argument('-k', '--compression', required=False, default=None, metavar='K', \
                        type=str, help='Compression to use when pickling results dataframe')
    args = parser.parse_args()

    startTime = datetime.datetime.now()

    out_fn = args.out
    if out_fn is None:
        out_fn = 'opt_df.{}.{}.{}'.format(args.jd_time, args.pol, args.dist)
    if args.out_dir is not None:
        if not os.path.exists(args.out_dir):
            os.mkdir(args.out_dir)
        out_fn = os.path.join(args.out_dir, out_fn)

    out_csv = fn_format(out_fn, 'csv')
    out_pkl = out_csv.rsplit('.', 1)[0] + '.pkl'
    csv_exists = os.path.exists(out_csv)
    pkl_exists = os.path.exists(out_pkl)
    if csv_exists or pkl_exists:
        if args.new_df:
            out_csv = new_fn(out_csv, None, startTime)
            out_pkl = out_csv.rsplit('.', 1)[0] + '.pkl'
            csv_exists = False
            pkl_exists = False

    freq_chans = mod_str_arg(args.chans)
    time_ints = mod_str_arg(args.tints)

    zen_fn = find_zen_file(args.jd_time)
    bad_ants = get_bad_ants(zen_fn)
    flag_fn = find_flag_file(args.jd_time, 'first') # returns None if not found

    rel_df_path = find_rel_df(args.jd_time, args.pol, args.dist, args.rel_dir)
    rel_df = pd.read_pickle(rel_df_path)

    # retrieving visibility metadata
    md_fn = 'rel_df.{}.{}.md.pkl'.format(args.jd_time, args.pol)
    if args.rel_dir is not None:
        md_fn = os.path.join(args.rel_dir, md_fn)
    with open(md_fn, 'rb') as f:
        md = pickle.load(f)
    antpos = md['antpos']
    no_unq_bls = md['no_unq_bls']
    redg = md['redg']

    pchans = args.chans
    if pchans is None:
        pchans = '0~{}'.format(md['Nfreqs']-1)
    ptints = args.tints
    if ptints is None:
        ptints = '0~{}'.format(md['Ntimes']-1)
    print('Running absolute optimal calibration for visibility dataset {} '\
          'for frequency channel(s) {} and time integration(s) {} '\
          'with {} assumed noise distribution\n'.\
          format(os.path.basename(zen_fn), pchans, ptints, args.dist))

    if freq_chans is None:
        freq_chans = numpy.arange(md['Nfreqs'])
    if time_ints is None:
        time_ints = numpy.arange(md['Ntimes'])

    # filter by specified channels and time integrations
    freq_flt = numpy.in1d(rel_df.index.get_level_values('freq'), freq_chans)
    tint_flt = numpy.in1d(rel_df.index.get_level_values('time_int'), time_ints)
    rel_df = rel_df[freq_flt & tint_flt]

    # only getting frequencies and time integrations that exist in the df
    freq_chans = rel_df.index.get_level_values('freq').unique().values
    time_ints = rel_df.index.get_level_values('time_int').unique().values

    indices = ['freq', 'time_int']
    no_tints = len(time_ints)
    iter_dims = list(numpy.ndindex((len(freq_chans), no_tints)))

    if not iter_dims:
        raise ValueError('No frequency channels or time integrations to '\
            'iterate over - check that the specified --chans and --tints exist '\
            'in the relative calibration results dataframes')

    skip_cal = False
    # skipping freqs and tints that are already in dataframe
    if csv_exists or pkl_exists:
        cmap_f = dict(map(reversed, enumerate(freq_chans)))
        cmap_t = dict(map(reversed, enumerate(time_ints)))
        if csv_exists:
            df = pd.read_csv(out_csv, usecols=indices)
            idx_arr = df.values
        elif pkl_exists:
            df_pkl = pd.read_pickle(out_pkl)
            idx_arr = df_pkl.reset_index()[indices].values
        done = [(cmap_f[f], cmap_t[t]) for (f, t) in idx_arr if (f in freq_chans \
        and t in time_ints)]
        iter_dims = [idim for idim in iter_dims if idim not in done]
        if not any(iter_dims):
            print('Solutions to all specified frequency channels and time '\
                  'integrations already exist in {}\n'.format(out_pkl))
            skip_cal = True

    if not skip_cal:
        hd, RedG, cData = group_data(zen_fn, args.pol, freq_chans, time_ints, \
                                     bad_ants, flag_path=flag_fn)
        flags = cData.mask
        cData = cData.data

        ants = numpy.unique(RedG[:, 1:])
        no_ants = ants.size
        no_unq_bls = numpy.unique(RedG[:, 0]).size
        cRedG = relabelAnts(RedG)

        # removing 'jac', 'hess_inv', 'nfev', 'njev'
        slct_keys = ['success', 'status', 'message', 'fun', 'nit', 'x']
        no_deg_params = 4 # overall amplitude, overall phase, x & y phase gradients
        psize = no_ants*2 + no_deg_params + no_unq_bls*2
        header = slct_keys[:-1] + list(numpy.arange(psize)) + indices

        ant_pos_arr = flt_ant_pos(hd.antpos, ants)
        ant_sep = red_ant_sep(RedG, hd.antpos)

        def get_w_alpha(res_rel_vis, new_deg_params):
            """Apply degenerate parameters found from optimal absolute
            calibration to visibility solutions from relative redundant
            calibration

            :param res_rel_vis: Visibility solutions
            :type res_rel_vis: ndarray
            :param new_deg_params: Degenerate parameters
            optimal calibration
            :type new_deg_params: ndarray

            :return: Degenerately transformed visibility solutions
            :rtype: ndarray
            """
            return degVis(ant_sep, res_rel_vis, *new_deg_params[[0, 2, 3]])

        stdout = io.StringIO()
        with redirect_stdout(stdout): # suppress output
            with open(out_csv, 'a') as f: # write / append to csv file
                writer = DictWriter(f, fieldnames=header)
                if not csv_exists:
                    writer.writeheader()
                initp = None
                for i, iter_dim in enumerate(iter_dims):
                    # get absolute optimal calibrated solutions
                    rel_idim = (freq_chans[iter_dim[0]], time_ints[iter_dim[1]])
                    res_rel_vis, _ = split_rel_results(rel_df.loc[rel_idim]\
                        [len(slct_keys)-1:-2].values.astype(float), no_unq_bls)
                    res_opt = doOptCal(cRedG, cData[iter_dim], no_ants, ant_pos_arr, \
                                       ant_sep, res_rel_vis, distribution=args.dist, \
                                       ref_ant_idx=args.ref_ant_idx, logamp=args.logamp, \
                                       initp=initp)
                    res_opt = {key:res_opt[key] for key in slct_keys}
                    # get the new visibility solutions
                    w_alpha = get_w_alpha(res_rel_vis, res_opt['x'][-no_deg_params:])
                    w_alpha_comps = decomposeCArray(w_alpha)
                    all_params = numpy.append(res_opt['x'], w_alpha_comps)
                    # expanding out the solution
                    for j, param in enumerate(all_params):
                        res_opt[j] = param
                    # to use solution for next solve in iteration
                    if res_opt['success']:
                        initp = res_opt['x']
                    # reset initp after each frequency slice
                    if not (i+1)%no_tints:
                        initp = None
                    del res_opt['x']
                    res_opt.update({indices[0]:rel_idim[0], \
                                    indices[1]:rel_idim[1]})
                    writer.writerow(res_opt)

        print('Absolute optimal calibration results saved to csv file {}'\
              .format(out_csv))
        df = pd.read_csv(out_csv)
        df.set_index(indices, inplace=True)
        df = append_residuals_opt(df, cData, cRedG, out_fn=None)
        if pkl_exists and not csv_exists:
            df = pd.concat([df, df_pkl])
        df.sort_values(by=indices, inplace=True)
        if args.compression is not None:
            out_pkl += '.{}'.format(args.compression)
            print('{} compression used in pickling the dataframe'.format(args.compression))
        df.to_pickle(out_pkl, compression=args.compression)
        print('Absolute optimal calibration results dataframe pickled to {}'\
              .format(out_pkl))

    print('Script run time: {}'.format(datetime.datetime.now() - startTime))
Esempio n. 5
0
def main():
    parser = argparse.ArgumentParser(formatter_class=argparse.\
    RawDescriptionHelpFormatter, description=textwrap.dedent("""
    Degenerate fitting of relatively calibrated visibility solutions

    Takes the relatively calibrated visibility solutions of HERA data, and fits
    degenerate parameters (overall amplitude, overall phase, and phase gradient)
    to two adjacent datasets in either LAST, frequency, or JD.

    Returns a pickled pandas dataframe of the Scipy optimization results for
    the degenerate fitting.
    """))
    parser.add_argument('jd_time', help='Fractional JD time of dataset to \
                        analyze', metavar='JD', type=str)
    parser.add_argument('-o', '--out', required=False, default=None, \
                        metavar='O', type=str, help='Output csv and df name')
    parser.add_argument('-p', '--pol', required=True, metavar='P', type=str, \
                        help='Polarization {"ee", "en", "nn", "ne"}')
    parser.add_argument('-x', '--deg_dim', required=True, metavar='X', type=str, \
                        help='Which dimension to compare relatively calibrated \
                        visibility solutions {"tint", "freq", "jd"}')
    parser.add_argument('-m', '--coords', required=False, default='cartesian', \
                        metavar='M', type=str, help='Coordinates used for rel cal \
                        results - {"cartesian", "polar"}')
    parser.add_argument('-c', '--chans', required=False, default=None, metavar='C', \
                        type=str, help='Frequency channels to fit {0, 1023}')
    parser.add_argument('-t', '--tints', required=False, default=None, metavar='T', \
                        type=str, help='Time integrations to fit {0, 59}')
    parser.add_argument('-d', '--dist', required=True, default='cauchy', metavar='D', \
                        type=str, help='Fitting distribution for calibration \
                        {"cauchy", "gaussian"}')
    parser.add_argument('-j', '--tgt_jd', required=False, default=None, metavar='J', \
                        type=int, help='JD day for fitting across JDs - only if \
                        deg_dim = "jd". Default to pick consecutive JD day')
    parser.add_argument('-r', '--rel_dir', required=False, default=None, metavar='R', \
                        type=str, help='Directory in which relative calibration \
                        results dataframes are located')
    parser.add_argument('-u', '--out_dir', required=False, default=None, metavar='U', \
                        type=str, help='Out directory to store dataframe')
    parser.add_argument('-n', '--new_df', required=False, action='store_true', \
                        help='Write data to a new csv file')
    parser.add_argument('-k', '--compression', required=False, default=None, metavar='K', \
                        type=str, help='Compression to use when pickling results dataframe')
    args = parser.parse_args()

    startTime = datetime.datetime.now()

    pjd = ''
    if args.deg_dim == 'jd':
        tgt_jd = args.tgt_jd
        if tgt_jd is None:
            # choose consecutive JD as default
            tgt_jd = int(float(args.jd_time)) + 1
        pjd = '.' + str(tgt_jd)

    out_fn = args.out
    if out_fn is None:
        out_fn = 'deg_df.{}.{}.{}{}.{}'.format(args.jd_time, args.pol, \
                                               args.deg_dim, pjd, args.dist)
    if args.out_dir is not None:
        if not os.path.exists(args.out_dir):
            os.mkdir(args.out_dir)
        out_fn = os.path.join(args.out_dir, out_fn)

    out_csv = fn_format(out_fn, 'csv')
    out_pkl = out_csv.rsplit('.', 1)[0] + '.pkl'
    csv_exists = os.path.exists(out_csv)
    pkl_exists = os.path.exists(out_pkl)
    if csv_exists or pkl_exists:
        if args.new_df:
            out_csv = new_fn(out_csv, None, startTime)
            out_pkl = out_csv.rsplit('.', 1)[0] + '.pkl'
            csv_exists = False
            pkl_exists = False

    freq_chans = mod_str_arg(args.chans)
    time_ints = mod_str_arg(args.tints)

    rel_df_path = find_rel_df(args.jd_time, args.pol, args.dist, args.rel_dir)
    rel_df = pd.read_pickle(rel_df_path)

    # retrieving visibility metadata
    md_fn = 'rel_df.{}.{}.md.pkl'.format(args.jd_time, args.pol)
    if args.rel_dir is not None:
        md_fn = os.path.join(args.rel_dir, md_fn)
    with open(md_fn, 'rb') as f:
        md = pickle.load(f)
    ant_pos = md['antpos']
    no_unq_bls = md['no_unq_bls']
    RedG = md['redg']
    ant_sep = red_ant_sep(RedG, ant_pos)

    pchans = args.chans
    if pchans is None:
        pchans = '0~{}'.format(md['Nfreqs']-1)
    ptints = args.tints
    if ptints is None:
        ptints = '0~{}'.format(md['Ntimes']-1)
    pdict = {'freq':'frequency channels', 'tint':'time integrations', \
             'jd':'Julian days'}
    print('Running degenerate translation on adjacent {} for visibility dataset {} '\
          'for frequency channel(s) {} and time integration(s) {} with {} '\
          'assumed noise distribution\n'.format(pdict[args.deg_dim], \
          os.path.basename(find_zen_file(args.jd_time)), pchans, ptints, args.dist))

    if freq_chans is None:
        freq_chans = numpy.arange(md['Nfreqs'])
    if time_ints is None:
        time_ints = numpy.arange(md['Ntimes'])

    # filter by specified channels and time integrations
    freq_flt = numpy.in1d(rel_df.index.get_level_values('freq'), freq_chans)
    tint_flt = numpy.in1d(rel_df.index.get_level_values('time_int'), time_ints)
    rel_df = rel_df[freq_flt & tint_flt]

    # only getting frequencies and time integrations that exist in the df
    freq_chans = rel_df.index.get_level_values('freq').unique().values
    time_ints = rel_df.index.get_level_values('time_int').unique().values

    if args.deg_dim == 'freq':
        indices = ['freq1', 'freq2', 'time_int']
        # getting adjacent frequency channel pairs
        iter_dims = [idim for idim in zip(freq_chans, freq_chans[1:]) if \
                     idim[1] - idim[0] == 1]
        iter_dims = [idim+(time_int,) for idim in iter_dims for time_int in \
                     time_ints] # adding time integrations
        a, b, c, d = 0, 1, 2, 2 # for iteration indexing

    if args.deg_dim == 'tint':
        indices = ['time_int1', 'time_int2', 'freq']
        # getting adjacent LAST (time integration) pairs
        iter_dims = [idim for idim in zip(time_ints, time_ints[1:]) if \
                     idim[1] - idim[0] == 1]
        iter_dims = [idim+(freq_chan,) for idim in iter_dims for freq_chan in \
                     freq_chans] # adding frequency channels
        a, b, c, d = 2, 2, 0, 1 # for iteration indexing

    if args.deg_dim == 'jd':
        indices = ['time_int1', 'time_int2', 'freq']
        # find dataset from specified JD that contains visibilities at the same LAST
        jd_time2 = match_lst(args.jd_time, tgt_jd)
        if len(str(jd_time2)) < 13:
            jd_time2 = str(jd_time2) + '0' # add a trailing 0 that is omitted in float
        rel_df_path2 = find_rel_df(jd_time2, args.pol, args.dist, args.rel_dir)
        if isinstance(jd_time2, str):
            jd_time2 = float(jd_time2)
        # aligning datasets in LAST
        last_df = pd.read_pickle('jd_lst_map_idr2.pkl')
        last1 = last_df[last_df['JD_time'] == float(args.jd_time)]['LASTs'].values[0]
        last2 = last_df[last_df['JD_time'] == jd_time2]['LASTs'].values[0]
        _, offset = find_nearest(last2, last1[0])

        rel_df2 = pd.read_pickle(rel_df_path2)
        rel_df2 = rel_df2[rel_df2.index.get_level_values('time_int') >= offset]

        next_row = numpy.where(last_df['JD_time'] == jd_time2)[0][0] + 1
        rel_df_path3 = find_rel_df(last_df.iloc[next_row]['JD_time'], args.pol, \
                                   args.dist, args.rel_dir)
        rel_df3 = pd.read_pickle(rel_df_path3)
        rel_df3 = rel_df3[rel_df3.index.get_level_values('time_int') < offset]

        # combined results dataframes that is now alinged in LAST by row number
        # with rel_df:
        rel_df_c = pd.concat([rel_df2, rel_df3])
        # pairing time_ints from rel_df and rel_df_c that match in LAST
        # time_ints2 = rel_df_c.index.get_level_values('time_int').unique().values
        time_ints2 = numpy.arange(offset, offset + md['Ntimes']) % md['Ntimes']
        iter_dims = [idim for idim in zip(time_ints, time_ints2[time_ints])]
        iter_dims = [idim+(freq_chan,) for idim in iter_dims for freq_chan in \
                     freq_chans]
        iter_dims = sorted(iter_dims, key=lambda row: row[2]) # iterate across
        # LAST first - should speed up fitting
        a, b, c, d = 2, 2, 0, 1 # for iteration indexing
    else:
        rel_df_c = rel_df

    if not iter_dims:
        raise ValueError('No frequency channels or time integrations to '\
                         'iterate over - check that the specified --chans '\
                         'and --tints exist in the relative calibration '\
                         'results dataframes')

    skip_cal = False
    # skipping freqs and tints that are already in dataframe
    if csv_exists or pkl_exists:
        if csv_exists:
            df = pd.read_csv(out_csv, usecols=indices)
            idx_arr = df.values
        elif pkl_exists:
            df_pkl = pd.read_pickle(out_pkl)
            idx_arr = df_pkl.reset_index()[indices].values
        iter_dims = [idim for idim in iter_dims if not \
            numpy.equal(idx_arr, numpy.asarray(idim)).all(1).any()]
        if not any(iter_dims):
            print('Solutions to all specified frequency channels and time '\
                  'integrations already exist in {}\n'.format(out_pkl))
            skip_cal = True

    if not skip_cal:
        # removing 'jac', 'hess_inv', 'nfev', 'njev'
        slct_keys = ['success', 'status', 'message', 'fun', 'nit', 'x']
        no_deg_params = 3 # overall amplitude, x phase gradient, y phase gradient
        header = slct_keys[:-1] + list(numpy.arange(no_deg_params)) + indices

        stdout = io.StringIO()
        with redirect_stdout(stdout): # suppress output
            with open(out_csv, 'a') as f: # write / append to csv file
                writer = DictWriter(f, fieldnames=header)
                if not csv_exists:
                    writer.writeheader()
                initp = None
                for iter_dim in iter_dims:
                    # get relatively calibrated solutions
                    resx1 = rel_df.loc[iter_dim[a], iter_dim[c]][len(slct_keys)-1:-2]\
                    .values.astype(float)
                    resx2 = rel_df_c.loc[iter_dim[b], iter_dim[d]][len(slct_keys)-1:-2]\
                    .values.astype(float)
                    rel_vis1, _ = split_rel_results(resx1, no_unq_bls, coords=args.coords)
                    rel_vis2, _ = split_rel_results(resx2, no_unq_bls, coords=args.coords)

                    res_deg = doDegVisVis(ant_sep, rel_vis1, rel_vis2, \
                                          distribution=args.dist, initp=initp)
                    res_deg = {key:res_deg[key] for key in slct_keys}
                    # expanding out the solution
                    for i, param in enumerate(res_deg['x']):
                        res_deg[i] = param
                    # to use solution for next solve in iteration
                    # if res_deg['success']:
                    #     initp = res_deg['x']
                    del res_deg['x']
                    res_deg.update({indices[i]:iter_dim[i] for i in \
                                    range(no_deg_params)})
                    writer.writerow(res_deg)

        print('Degenerate fitting results saved to csv file {}'.format(out_csv))
        df = pd.read_csv(out_csv)
        df_indices = indices.copy()
        mv_col = df_indices.pop(1)
        df.set_index(df_indices, inplace=True)
        cols = list(df.columns.values)
        cols.remove(mv_col)
        df = df[[mv_col]+cols]
        # we now append the residuals as additional columns
        df = append_residuals_deg(df, rel_df, rel_df_c, md, out_fn=None)
        if pkl_exists and not csv_exists:
            df = pd.concat([df, df_pkl])
        df.sort_values(by=indices, inplace=True)
        if args.compression is not None:
            out_pkl += '.{}'.format(args.compression)
            print('{} compression used in pickling the dataframe'.format(args.compression))
        df.to_pickle(out_pkl, compression=args.compression)
        print('Degenerate fitting results dataframe pickled to {}'.format(out_pkl))

    print('Script run time: {}'.format(datetime.datetime.now() - startTime))