def calc_deg_residuals(row):
            """Calculate residual and normalized residual to append to degenerate
            fitting results dataframe

            :param row: Row of the degenerate fitting results dataframe
            :type row: Series

            :return: Residual columns to append to dataframe
            :rtype: Series
            """
            cidx = len([col for col in rel_df1.columns.values if not \
                   col.isdigit()]) - 2
            if 'time_int' not in deg_cols:
                A = row['freq']
                B = A
                C = row['time_int1']
                D = row['time_int2']
            elif 'freq' not in deg_cols:
                A = row['freq1']
                B = row['freq2']
                C = row['time_int']
                D = C
            resx1 = rel_df1.loc[A, C][cidx:-2]\
            .values.astype(float)
            resx2 = rel_df2.loc[B, D][cidx:-2]\
            .values.astype(float)
            rel_vis1, _ = split_rel_results(resx1, md['no_unq_bls'])
            rel_vis2, _ = split_rel_results(resx2, md['no_unq_bls'])
            deg_w_alpha = degVis(ant_sep, rel_vis1,
                                 *row[-3:].values.astype(float))
            deg_residuals = rel_vis2 - deg_w_alpha
            norm_deg_residuals = norm_residuals(rel_vis2, deg_w_alpha)
            return pd.Series([deg_residuals, norm_deg_residuals])
        def calc_rel_residuals(row):
            """Calculate residual and normalized residual to append to relative
            calibration results dataframe

            :param row: Row of the relative calibration results dataframe
            :type row: Series

            :return: Residual columns to append to dataframe
            :rtype: Series
            """
            cidx = len(
                [col for col in rel_df.columns.values if not col.isdigit()])
            cmap_f = dict(map(reversed, enumerate(freqs)))
            cmap_t = dict(map(reversed, enumerate(tints)))
            resx = row.values[cidx:].astype(float)
            res_rel_vis, res_rel_gains = split_rel_results(resx, no_unq_bls, \
                                                           coords)
            if xd:
                gvisc = XDgVis
                res_rel_gains = res_rel_gains.reshape((cdata.shape[0], -1))
                res_rel_vis = numpy.tile(res_rel_vis, cdata.shape[0]).\
                                reshape((cdata.shape[0], -1))
                obs_vis = cdata[:, cmap_f[row['freq']],
                                cmap_t[row['time_int']], :]
            else:
                gvisc = gVis
                obs_vis = cdata[cmap_f[row['freq']],
                                cmap_t[row['time_int']], :]
            pred_rel_vis = gvisc(res_rel_vis, credg, res_rel_gains)
            rel_residuals = obs_vis - pred_rel_vis
            norm_rel_residuals = norm_residuals(obs_vis, pred_rel_vis)
            return pd.Series([rel_residuals, norm_rel_residuals])
Beispiel #3
0
def main():
    parser = argparse.ArgumentParser(formatter_class=argparse.\
    RawDescriptionHelpFormatter, description=textwrap.dedent("""
    Check the relative redundant calibration of visibilities

    The relative calibration script rel_cal.py reuses the previous solution to
    initialize the next solver. While this greatly speeds up the code, there is
    the worry that the solver gets stuck in a local minimum. This script takes a
    random sample of frequency and time integration slices from a dataframe of
    results from relative calibration, and re-runs relative calibration on them
    with vanilla initial parameter guesses of 1+1j for the gains and 0+0j for
    the visibilities, and verifies if these results match with the original
    results by checking both their negative log-likelhoods and their gain
    amplitudes. We expect these to match, but we do not expect the gain and
    visibility solution phases to be equal, since there are still some additional
    degeneracies (overall phase & tilt shifts) that have not been accounted for.

    Returns a pickled pandas dataframe of the Scipy optimization results for
    the relative redundant calibration for each set of randomly chosen frequency
    channel and time integration slices chosen from the rel_df results dataframe
    """))
    parser.add_argument('rel_df',
                        help='Relative calibration results dataframe \
                        in pickle file format',
                        metavar='df',
                        type=str)
    parser.add_argument('-o', '--out', required=False, default=None, \
                        metavar='O', type=str, help='Output csv and df name')
    parser.add_argument('-c', '--no_checks', required=False, default=50, \
                        metavar='C', type=int, help='Number of checks')
    parser.add_argument('-t', '--tol', required=False, default=0.01, \
                        metavar='T', type=float, help='Tolerance for the \
                        negative log-likelihood of relative calibration results \
                        to match'                                 )
    parser.add_argument('-w', '--overwrite', required=False, action='store_true', \
                        help='Overwrite existing check csv and dataframe')
    parser.add_argument('-r', '--rel_dir', required=False, default=None, metavar='R', \
                        type=str, help='Directory in which rel_dfs are stored')
    parser.add_argument('-u', '--out_dir', required=False, default=None, metavar='U', \
                        type=str, help='Out directory to store dataframe')
    parser.add_argument('-k', '--keep_csv', required=False, action='store_true', \
                        help='Keep csv file')
    args = parser.parse_args()

    startTime = datetime.datetime.now()

    sout = args.rel_df.split('.')
    jd_time = str('{}.{}'.format(sout[1], sout[2]))
    pol = sout[3]
    dist = sout[4]
    no_checks = args.no_checks

    out_fn = args.out
    if out_fn is None:
        out_fn = 'check_rel_df.{}.{}.{}'.format(jd_time, pol, dist)
    if args.out_dir is not None:
        if not os.path.exists(args.out_dir):
            os.mkdir(args.out_dir)
        out_fn = os.path.join(args.out_dir, out_fn)

    out_csv = fn_format(out_fn, 'csv')
    csv_exists = os.path.exists(out_csv)
    if csv_exists:
        if args.overwrite:
            os.remove(out_csv)

    out_df = out_csv.rsplit('.', 1)[0] + '.pkl'
    df_exists = os.path.exists(out_df)
    if df_exists:
        if args.overwrite:
            os.remove(out_df)
            df_exists = False

    match_keys = ['loglkl_match', 'gamp_match']
    if not df_exists:
        zen_fn = find_zen_file(jd_time)
        bad_ants = get_bad_ants(zen_fn)

        print('Checking the relative redundant calibration results for {}\n'.\
              format(args.rel_df))
        if args.rel_dir is not None:
            rel_dir_path = os.path.join(args.rel_dir, args.rel_df)
        else:
            rel_dir_path = args.rel_df
        rel_df = pd.read_pickle(rel_dir_path)
        no_checks = min(no_checks, len(rel_df.index))
        rnd_idxs = numpy.random.choice(rel_df.index.values, no_checks, \
                                       replace=False)
        rnd_chans = numpy.unique([rnd_idx[0] for rnd_idx in rnd_idxs])
        fmap = dict(map(reversed, enumerate(rnd_chans)))

        hd, RedG, cData = group_data(zen_fn, pol, rnd_chans, None, bad_ants)
        cData = cData.data

        freq_chans = numpy.arange(hd.Nfreqs)
        time_ints = numpy.arange(hd.Ntimes)

        # to get fields for the csv header
        no_ants = numpy.unique(RedG[:, 1:]).size
        no_unq_bls = numpy.unique(RedG[:, 0]).size
        cRedG = relabelAnts(RedG)
        psize = (no_ants + no_unq_bls) * 2

        indices = ['freq', 'time_int']
        slct_keys = ['success', 'status', 'message', 'fun', 'nit', 'x']
        header = slct_keys[:-1] + match_keys + list(
            numpy.arange(psize)) + indices

        stdout = io.StringIO()
        with redirect_stdout(stdout):  # suppress output
            with open(out_csv, 'a') as f:  # write / append to csv file
                writer = DictWriter(f, fieldnames=header)
                writer.writeheader()
                for iter_dim in rnd_idxs:
                    res_rel = doRelCal(cRedG, cData[fmap[iter_dim[0]], iter_dim[1]], \
                                       no_unq_bls, no_ants, coords='cartesian', \
                                       distribution=dist, norm_gains=True)
                    res_rel = {key: res_rel[key] for key in slct_keys}

                    # checking results
                    res_rel[match_keys[0]] = numpy.abs(norm_residuals(rel_df.\
                        loc[iter_dim]['fun'], res_rel['fun'])) < args.tol
                    res_gamp = numpy.abs(split_rel_results(rel_df.loc[iter_dim][5:-2].\
                                         values.astype(float), no_unq_bls)[1])
                    check_gamp = numpy.abs(split_rel_results(res_rel['x'], \
                                           no_unq_bls)[1])
                    res_rel[match_keys[1]] = (numpy.abs(norm_residuals(res_gamp, \
                                              check_gamp)) < args.tol).all()

                    # expanding out the solution
                    for i, param in enumerate(res_rel['x']):
                        res_rel[i] = param
                    del res_rel['x']
                    res_rel.update({indices[0]:iter_dim[0], \
                                    indices[1]:iter_dim[1]})
                    writer.writerow(res_rel)

        df = pd.read_csv(out_csv)
        df.set_index(indices, inplace=True)
        df.sort_values(by=indices, inplace=True)
        df.to_pickle(out_df)

        if not args.keep_csv:
            os.remove(out_csv)
        else:
            print('Checked relative calibration results saved to csv file {}'.\
                  format(out_csv))

        print('Checked relative calibration results dataframe pickled to {}\n'.\
              format(out_df))

    else:
        df = pd.read_pickle(out_df)
        print('Checked relative calibration results already exists in {} - '\
              'specify --overwrite as an argument to perform check again.\n'.\
              format(out_df))
        no_checks = len(df.index)

    matches = df[match_keys].values
    all_match = matches.all()
    if all_match:
        pmatch = 'All'
    else:
        pmatch = '{}% of'.format(
            round(100 * numpy.sum(matches[:, 1]) / matches.shape[0], 2))
    print('{} iterations from the {} randomly selected frequency and time '\
          'slices match the original results at a tolerance of {}%.\n'.\
          format(pmatch, no_checks, args.tol*100))
    if not all_match:
        print('Mismatched iterations are {}\n'.format(
            df[~df['gamp_match']].index.values))

    print('Script run time: {}'.format(datetime.datetime.now() - startTime))
Beispiel #4
0
def main():
    parser = argparse.ArgumentParser(formatter_class=argparse.\
    RawDescriptionHelpFormatter, description=textwrap.dedent("""
    Absolute optimal calibration of relatively calibrated visibility solutions

    Takes the relatively calibrated visibility solutions of HERA data, and
    constrains their degenerate parameters, such that average amplitude of
    antenna gains is set to 1, the average phase of antenna gains is set to 0,
    the overall phase if set to 0 and the phase gradients are 0.

    Returns a pickled pandas dataframe of the Scipy optimization results for
    the absolute optimal calibration.
    """))
    parser.add_argument('jd_time', help='Fractional JD time of dataset to \
                        analyze', metavar='JD', type=str)
    parser.add_argument('-o', '--out', required=False, default=None, \
                        metavar='O', type=str, help='Output csv and df name')
    parser.add_argument('-p', '--pol', required=True, metavar='P', type=str, \
                        help='Polarization {"ee", "en", "nn", "ne"}')
    parser.add_argument('-c', '--chans', required=False, default=None, metavar='C', \
                        type=str, help='Frequency channels to fit {0, 1023}')
    parser.add_argument('-t', '--tints', required=False, default=None, metavar='T', \
                        type=str, help='Time integrations to fit {0, 59}')
    parser.add_argument('-d', '--dist', required=True, metavar='D', \
                        type=str, help='Fitting distribution for calibration \
                        {"cauchy", "gaussian"}')
    parser.add_argument('-a', '--ref_ant_idx', required=False, default=16, metavar='A', \
                        type=int, help='Reference antenna index to set the overall \
                        phase')
    parser.add_argument('-l', '--logamp', required=False, action='store_true', \
                        help='Use logamp method to force positive gain amplitudes')
    parser.add_argument('-r', '--rel_dir', required=False, default=None, metavar='R', \
                        type=str, help='Directory in which relative calibration \
                        results dataframes are located')
    parser.add_argument('-u', '--out_dir', required=False, default=None, metavar='U', \
                        type=str, help='Out directory to store dataframe')
    parser.add_argument('-n', '--new_df', required=False, action='store_true', \
                        help='Write data to a new csv file')
    parser.add_argument('-k', '--compression', required=False, default=None, metavar='K', \
                        type=str, help='Compression to use when pickling results dataframe')
    args = parser.parse_args()

    startTime = datetime.datetime.now()

    out_fn = args.out
    if out_fn is None:
        out_fn = 'opt_df.{}.{}.{}'.format(args.jd_time, args.pol, args.dist)
    if args.out_dir is not None:
        if not os.path.exists(args.out_dir):
            os.mkdir(args.out_dir)
        out_fn = os.path.join(args.out_dir, out_fn)

    out_csv = fn_format(out_fn, 'csv')
    out_pkl = out_csv.rsplit('.', 1)[0] + '.pkl'
    csv_exists = os.path.exists(out_csv)
    pkl_exists = os.path.exists(out_pkl)
    if csv_exists or pkl_exists:
        if args.new_df:
            out_csv = new_fn(out_csv, None, startTime)
            out_pkl = out_csv.rsplit('.', 1)[0] + '.pkl'
            csv_exists = False
            pkl_exists = False

    freq_chans = mod_str_arg(args.chans)
    time_ints = mod_str_arg(args.tints)

    zen_fn = find_zen_file(args.jd_time)
    bad_ants = get_bad_ants(zen_fn)
    flag_fn = find_flag_file(args.jd_time, 'first') # returns None if not found

    rel_df_path = find_rel_df(args.jd_time, args.pol, args.dist, args.rel_dir)
    rel_df = pd.read_pickle(rel_df_path)

    # retrieving visibility metadata
    md_fn = 'rel_df.{}.{}.md.pkl'.format(args.jd_time, args.pol)
    if args.rel_dir is not None:
        md_fn = os.path.join(args.rel_dir, md_fn)
    with open(md_fn, 'rb') as f:
        md = pickle.load(f)
    antpos = md['antpos']
    no_unq_bls = md['no_unq_bls']
    redg = md['redg']

    pchans = args.chans
    if pchans is None:
        pchans = '0~{}'.format(md['Nfreqs']-1)
    ptints = args.tints
    if ptints is None:
        ptints = '0~{}'.format(md['Ntimes']-1)
    print('Running absolute optimal calibration for visibility dataset {} '\
          'for frequency channel(s) {} and time integration(s) {} '\
          'with {} assumed noise distribution\n'.\
          format(os.path.basename(zen_fn), pchans, ptints, args.dist))

    if freq_chans is None:
        freq_chans = numpy.arange(md['Nfreqs'])
    if time_ints is None:
        time_ints = numpy.arange(md['Ntimes'])

    # filter by specified channels and time integrations
    freq_flt = numpy.in1d(rel_df.index.get_level_values('freq'), freq_chans)
    tint_flt = numpy.in1d(rel_df.index.get_level_values('time_int'), time_ints)
    rel_df = rel_df[freq_flt & tint_flt]

    # only getting frequencies and time integrations that exist in the df
    freq_chans = rel_df.index.get_level_values('freq').unique().values
    time_ints = rel_df.index.get_level_values('time_int').unique().values

    indices = ['freq', 'time_int']
    no_tints = len(time_ints)
    iter_dims = list(numpy.ndindex((len(freq_chans), no_tints)))

    if not iter_dims:
        raise ValueError('No frequency channels or time integrations to '\
            'iterate over - check that the specified --chans and --tints exist '\
            'in the relative calibration results dataframes')

    skip_cal = False
    # skipping freqs and tints that are already in dataframe
    if csv_exists or pkl_exists:
        cmap_f = dict(map(reversed, enumerate(freq_chans)))
        cmap_t = dict(map(reversed, enumerate(time_ints)))
        if csv_exists:
            df = pd.read_csv(out_csv, usecols=indices)
            idx_arr = df.values
        elif pkl_exists:
            df_pkl = pd.read_pickle(out_pkl)
            idx_arr = df_pkl.reset_index()[indices].values
        done = [(cmap_f[f], cmap_t[t]) for (f, t) in idx_arr if (f in freq_chans \
        and t in time_ints)]
        iter_dims = [idim for idim in iter_dims if idim not in done]
        if not any(iter_dims):
            print('Solutions to all specified frequency channels and time '\
                  'integrations already exist in {}\n'.format(out_pkl))
            skip_cal = True

    if not skip_cal:
        hd, RedG, cData = group_data(zen_fn, args.pol, freq_chans, time_ints, \
                                     bad_ants, flag_path=flag_fn)
        flags = cData.mask
        cData = cData.data

        ants = numpy.unique(RedG[:, 1:])
        no_ants = ants.size
        no_unq_bls = numpy.unique(RedG[:, 0]).size
        cRedG = relabelAnts(RedG)

        # removing 'jac', 'hess_inv', 'nfev', 'njev'
        slct_keys = ['success', 'status', 'message', 'fun', 'nit', 'x']
        no_deg_params = 4 # overall amplitude, overall phase, x & y phase gradients
        psize = no_ants*2 + no_deg_params + no_unq_bls*2
        header = slct_keys[:-1] + list(numpy.arange(psize)) + indices

        ant_pos_arr = flt_ant_pos(hd.antpos, ants)
        ant_sep = red_ant_sep(RedG, hd.antpos)

        def get_w_alpha(res_rel_vis, new_deg_params):
            """Apply degenerate parameters found from optimal absolute
            calibration to visibility solutions from relative redundant
            calibration

            :param res_rel_vis: Visibility solutions
            :type res_rel_vis: ndarray
            :param new_deg_params: Degenerate parameters
            optimal calibration
            :type new_deg_params: ndarray

            :return: Degenerately transformed visibility solutions
            :rtype: ndarray
            """
            return degVis(ant_sep, res_rel_vis, *new_deg_params[[0, 2, 3]])

        stdout = io.StringIO()
        with redirect_stdout(stdout): # suppress output
            with open(out_csv, 'a') as f: # write / append to csv file
                writer = DictWriter(f, fieldnames=header)
                if not csv_exists:
                    writer.writeheader()
                initp = None
                for i, iter_dim in enumerate(iter_dims):
                    # get absolute optimal calibrated solutions
                    rel_idim = (freq_chans[iter_dim[0]], time_ints[iter_dim[1]])
                    res_rel_vis, _ = split_rel_results(rel_df.loc[rel_idim]\
                        [len(slct_keys)-1:-2].values.astype(float), no_unq_bls)
                    res_opt = doOptCal(cRedG, cData[iter_dim], no_ants, ant_pos_arr, \
                                       ant_sep, res_rel_vis, distribution=args.dist, \
                                       ref_ant_idx=args.ref_ant_idx, logamp=args.logamp, \
                                       initp=initp)
                    res_opt = {key:res_opt[key] for key in slct_keys}
                    # get the new visibility solutions
                    w_alpha = get_w_alpha(res_rel_vis, res_opt['x'][-no_deg_params:])
                    w_alpha_comps = decomposeCArray(w_alpha)
                    all_params = numpy.append(res_opt['x'], w_alpha_comps)
                    # expanding out the solution
                    for j, param in enumerate(all_params):
                        res_opt[j] = param
                    # to use solution for next solve in iteration
                    if res_opt['success']:
                        initp = res_opt['x']
                    # reset initp after each frequency slice
                    if not (i+1)%no_tints:
                        initp = None
                    del res_opt['x']
                    res_opt.update({indices[0]:rel_idim[0], \
                                    indices[1]:rel_idim[1]})
                    writer.writerow(res_opt)

        print('Absolute optimal calibration results saved to csv file {}'\
              .format(out_csv))
        df = pd.read_csv(out_csv)
        df.set_index(indices, inplace=True)
        df = append_residuals_opt(df, cData, cRedG, out_fn=None)
        if pkl_exists and not csv_exists:
            df = pd.concat([df, df_pkl])
        df.sort_values(by=indices, inplace=True)
        if args.compression is not None:
            out_pkl += '.{}'.format(args.compression)
            print('{} compression used in pickling the dataframe'.format(args.compression))
        df.to_pickle(out_pkl, compression=args.compression)
        print('Absolute optimal calibration results dataframe pickled to {}'\
              .format(out_pkl))

    print('Script run time: {}'.format(datetime.datetime.now() - startTime))
Beispiel #5
0
def main():
    parser = argparse.ArgumentParser(formatter_class=argparse.\
    RawDescriptionHelpFormatter, description=textwrap.dedent("""
    Degenerate fitting of relatively calibrated visibility solutions

    Takes the relatively calibrated visibility solutions of HERA data, and fits
    degenerate parameters (overall amplitude, overall phase, and phase gradient)
    to two adjacent datasets in either LAST, frequency, or JD.

    Returns a pickled pandas dataframe of the Scipy optimization results for
    the degenerate fitting.
    """))
    parser.add_argument('jd_time', help='Fractional JD time of dataset to \
                        analyze', metavar='JD', type=str)
    parser.add_argument('-o', '--out', required=False, default=None, \
                        metavar='O', type=str, help='Output csv and df name')
    parser.add_argument('-p', '--pol', required=True, metavar='P', type=str, \
                        help='Polarization {"ee", "en", "nn", "ne"}')
    parser.add_argument('-x', '--deg_dim', required=True, metavar='X', type=str, \
                        help='Which dimension to compare relatively calibrated \
                        visibility solutions {"tint", "freq", "jd"}')
    parser.add_argument('-m', '--coords', required=False, default='cartesian', \
                        metavar='M', type=str, help='Coordinates used for rel cal \
                        results - {"cartesian", "polar"}')
    parser.add_argument('-c', '--chans', required=False, default=None, metavar='C', \
                        type=str, help='Frequency channels to fit {0, 1023}')
    parser.add_argument('-t', '--tints', required=False, default=None, metavar='T', \
                        type=str, help='Time integrations to fit {0, 59}')
    parser.add_argument('-d', '--dist', required=True, default='cauchy', metavar='D', \
                        type=str, help='Fitting distribution for calibration \
                        {"cauchy", "gaussian"}')
    parser.add_argument('-j', '--tgt_jd', required=False, default=None, metavar='J', \
                        type=int, help='JD day for fitting across JDs - only if \
                        deg_dim = "jd". Default to pick consecutive JD day')
    parser.add_argument('-r', '--rel_dir', required=False, default=None, metavar='R', \
                        type=str, help='Directory in which relative calibration \
                        results dataframes are located')
    parser.add_argument('-u', '--out_dir', required=False, default=None, metavar='U', \
                        type=str, help='Out directory to store dataframe')
    parser.add_argument('-n', '--new_df', required=False, action='store_true', \
                        help='Write data to a new csv file')
    parser.add_argument('-k', '--compression', required=False, default=None, metavar='K', \
                        type=str, help='Compression to use when pickling results dataframe')
    args = parser.parse_args()

    startTime = datetime.datetime.now()

    pjd = ''
    if args.deg_dim == 'jd':
        tgt_jd = args.tgt_jd
        if tgt_jd is None:
            # choose consecutive JD as default
            tgt_jd = int(float(args.jd_time)) + 1
        pjd = '.' + str(tgt_jd)

    out_fn = args.out
    if out_fn is None:
        out_fn = 'deg_df.{}.{}.{}{}.{}'.format(args.jd_time, args.pol, \
                                               args.deg_dim, pjd, args.dist)
    if args.out_dir is not None:
        if not os.path.exists(args.out_dir):
            os.mkdir(args.out_dir)
        out_fn = os.path.join(args.out_dir, out_fn)

    out_csv = fn_format(out_fn, 'csv')
    out_pkl = out_csv.rsplit('.', 1)[0] + '.pkl'
    csv_exists = os.path.exists(out_csv)
    pkl_exists = os.path.exists(out_pkl)
    if csv_exists or pkl_exists:
        if args.new_df:
            out_csv = new_fn(out_csv, None, startTime)
            out_pkl = out_csv.rsplit('.', 1)[0] + '.pkl'
            csv_exists = False
            pkl_exists = False

    freq_chans = mod_str_arg(args.chans)
    time_ints = mod_str_arg(args.tints)

    rel_df_path = find_rel_df(args.jd_time, args.pol, args.dist, args.rel_dir)
    rel_df = pd.read_pickle(rel_df_path)

    # retrieving visibility metadata
    md_fn = 'rel_df.{}.{}.md.pkl'.format(args.jd_time, args.pol)
    if args.rel_dir is not None:
        md_fn = os.path.join(args.rel_dir, md_fn)
    with open(md_fn, 'rb') as f:
        md = pickle.load(f)
    ant_pos = md['antpos']
    no_unq_bls = md['no_unq_bls']
    RedG = md['redg']
    ant_sep = red_ant_sep(RedG, ant_pos)

    pchans = args.chans
    if pchans is None:
        pchans = '0~{}'.format(md['Nfreqs']-1)
    ptints = args.tints
    if ptints is None:
        ptints = '0~{}'.format(md['Ntimes']-1)
    pdict = {'freq':'frequency channels', 'tint':'time integrations', \
             'jd':'Julian days'}
    print('Running degenerate translation on adjacent {} for visibility dataset {} '\
          'for frequency channel(s) {} and time integration(s) {} with {} '\
          'assumed noise distribution\n'.format(pdict[args.deg_dim], \
          os.path.basename(find_zen_file(args.jd_time)), pchans, ptints, args.dist))

    if freq_chans is None:
        freq_chans = numpy.arange(md['Nfreqs'])
    if time_ints is None:
        time_ints = numpy.arange(md['Ntimes'])

    # filter by specified channels and time integrations
    freq_flt = numpy.in1d(rel_df.index.get_level_values('freq'), freq_chans)
    tint_flt = numpy.in1d(rel_df.index.get_level_values('time_int'), time_ints)
    rel_df = rel_df[freq_flt & tint_flt]

    # only getting frequencies and time integrations that exist in the df
    freq_chans = rel_df.index.get_level_values('freq').unique().values
    time_ints = rel_df.index.get_level_values('time_int').unique().values

    if args.deg_dim == 'freq':
        indices = ['freq1', 'freq2', 'time_int']
        # getting adjacent frequency channel pairs
        iter_dims = [idim for idim in zip(freq_chans, freq_chans[1:]) if \
                     idim[1] - idim[0] == 1]
        iter_dims = [idim+(time_int,) for idim in iter_dims for time_int in \
                     time_ints] # adding time integrations
        a, b, c, d = 0, 1, 2, 2 # for iteration indexing

    if args.deg_dim == 'tint':
        indices = ['time_int1', 'time_int2', 'freq']
        # getting adjacent LAST (time integration) pairs
        iter_dims = [idim for idim in zip(time_ints, time_ints[1:]) if \
                     idim[1] - idim[0] == 1]
        iter_dims = [idim+(freq_chan,) for idim in iter_dims for freq_chan in \
                     freq_chans] # adding frequency channels
        a, b, c, d = 2, 2, 0, 1 # for iteration indexing

    if args.deg_dim == 'jd':
        indices = ['time_int1', 'time_int2', 'freq']
        # find dataset from specified JD that contains visibilities at the same LAST
        jd_time2 = match_lst(args.jd_time, tgt_jd)
        if len(str(jd_time2)) < 13:
            jd_time2 = str(jd_time2) + '0' # add a trailing 0 that is omitted in float
        rel_df_path2 = find_rel_df(jd_time2, args.pol, args.dist, args.rel_dir)
        if isinstance(jd_time2, str):
            jd_time2 = float(jd_time2)
        # aligning datasets in LAST
        last_df = pd.read_pickle('jd_lst_map_idr2.pkl')
        last1 = last_df[last_df['JD_time'] == float(args.jd_time)]['LASTs'].values[0]
        last2 = last_df[last_df['JD_time'] == jd_time2]['LASTs'].values[0]
        _, offset = find_nearest(last2, last1[0])

        rel_df2 = pd.read_pickle(rel_df_path2)
        rel_df2 = rel_df2[rel_df2.index.get_level_values('time_int') >= offset]

        next_row = numpy.where(last_df['JD_time'] == jd_time2)[0][0] + 1
        rel_df_path3 = find_rel_df(last_df.iloc[next_row]['JD_time'], args.pol, \
                                   args.dist, args.rel_dir)
        rel_df3 = pd.read_pickle(rel_df_path3)
        rel_df3 = rel_df3[rel_df3.index.get_level_values('time_int') < offset]

        # combined results dataframes that is now alinged in LAST by row number
        # with rel_df:
        rel_df_c = pd.concat([rel_df2, rel_df3])
        # pairing time_ints from rel_df and rel_df_c that match in LAST
        # time_ints2 = rel_df_c.index.get_level_values('time_int').unique().values
        time_ints2 = numpy.arange(offset, offset + md['Ntimes']) % md['Ntimes']
        iter_dims = [idim for idim in zip(time_ints, time_ints2[time_ints])]
        iter_dims = [idim+(freq_chan,) for idim in iter_dims for freq_chan in \
                     freq_chans]
        iter_dims = sorted(iter_dims, key=lambda row: row[2]) # iterate across
        # LAST first - should speed up fitting
        a, b, c, d = 2, 2, 0, 1 # for iteration indexing
    else:
        rel_df_c = rel_df

    if not iter_dims:
        raise ValueError('No frequency channels or time integrations to '\
                         'iterate over - check that the specified --chans '\
                         'and --tints exist in the relative calibration '\
                         'results dataframes')

    skip_cal = False
    # skipping freqs and tints that are already in dataframe
    if csv_exists or pkl_exists:
        if csv_exists:
            df = pd.read_csv(out_csv, usecols=indices)
            idx_arr = df.values
        elif pkl_exists:
            df_pkl = pd.read_pickle(out_pkl)
            idx_arr = df_pkl.reset_index()[indices].values
        iter_dims = [idim for idim in iter_dims if not \
            numpy.equal(idx_arr, numpy.asarray(idim)).all(1).any()]
        if not any(iter_dims):
            print('Solutions to all specified frequency channels and time '\
                  'integrations already exist in {}\n'.format(out_pkl))
            skip_cal = True

    if not skip_cal:
        # removing 'jac', 'hess_inv', 'nfev', 'njev'
        slct_keys = ['success', 'status', 'message', 'fun', 'nit', 'x']
        no_deg_params = 3 # overall amplitude, x phase gradient, y phase gradient
        header = slct_keys[:-1] + list(numpy.arange(no_deg_params)) + indices

        stdout = io.StringIO()
        with redirect_stdout(stdout): # suppress output
            with open(out_csv, 'a') as f: # write / append to csv file
                writer = DictWriter(f, fieldnames=header)
                if not csv_exists:
                    writer.writeheader()
                initp = None
                for iter_dim in iter_dims:
                    # get relatively calibrated solutions
                    resx1 = rel_df.loc[iter_dim[a], iter_dim[c]][len(slct_keys)-1:-2]\
                    .values.astype(float)
                    resx2 = rel_df_c.loc[iter_dim[b], iter_dim[d]][len(slct_keys)-1:-2]\
                    .values.astype(float)
                    rel_vis1, _ = split_rel_results(resx1, no_unq_bls, coords=args.coords)
                    rel_vis2, _ = split_rel_results(resx2, no_unq_bls, coords=args.coords)

                    res_deg = doDegVisVis(ant_sep, rel_vis1, rel_vis2, \
                                          distribution=args.dist, initp=initp)
                    res_deg = {key:res_deg[key] for key in slct_keys}
                    # expanding out the solution
                    for i, param in enumerate(res_deg['x']):
                        res_deg[i] = param
                    # to use solution for next solve in iteration
                    # if res_deg['success']:
                    #     initp = res_deg['x']
                    del res_deg['x']
                    res_deg.update({indices[i]:iter_dim[i] for i in \
                                    range(no_deg_params)})
                    writer.writerow(res_deg)

        print('Degenerate fitting results saved to csv file {}'.format(out_csv))
        df = pd.read_csv(out_csv)
        df_indices = indices.copy()
        mv_col = df_indices.pop(1)
        df.set_index(df_indices, inplace=True)
        cols = list(df.columns.values)
        cols.remove(mv_col)
        df = df[[mv_col]+cols]
        # we now append the residuals as additional columns
        df = append_residuals_deg(df, rel_df, rel_df_c, md, out_fn=None)
        if pkl_exists and not csv_exists:
            df = pd.concat([df, df_pkl])
        df.sort_values(by=indices, inplace=True)
        if args.compression is not None:
            out_pkl += '.{}'.format(args.compression)
            print('{} compression used in pickling the dataframe'.format(args.compression))
        df.to_pickle(out_pkl, compression=args.compression)
        print('Degenerate fitting results dataframe pickled to {}'.format(out_pkl))

    print('Script run time: {}'.format(datetime.datetime.now() - startTime))