def main(): parser = argparse.ArgumentParser(formatter_class=argparse.\ RawDescriptionHelpFormatter, description=textwrap.dedent(""" Relative redundant calibration of visibilities Takes a given HERA visibility dataset in uvh5 file format and performs relative redundant calibration (up to the overall amplitude, overall phase, and phase gradient degenerate parameters) for each frequency channel and each time integration in the dataset. Returns a pickled pandas dataframe of the Scipy optimization results for the relative redundant calibration for each set of frequency channel and time integration. """)) parser.add_argument('jd_time', help='Fractional JD time of dataset to \ calibrate', metavar='JD', type=str) parser.add_argument('-o', '--out', required=False, default=None, \ metavar='O', type=str, help='Output csv and df name') parser.add_argument('-p', '--pol', required=True, metavar='P', type=str, \ help='Polarization {"ee", "en", "nn", "ne"}') parser.add_argument('-c', '--chans', required=False, default=None, metavar='C', \ type=str, help='Frequency channels to calibrate \ {0, 1023}' ) parser.add_argument('-t', '--tints', required=False, default=None, metavar='T', \ type=str, help='Time integrations to calibrate \ {0, 59}' ) parser.add_argument('-f', '--flag_type', required=False, default='first', \ metavar='F', type=str, help='Flag type e.g. "first", \ "omni", "abs"' ) parser.add_argument('-d', '--dist', required=True, metavar='D', \ type=str, help='Fitting distribution for calibration \ {"cauchy", "gaussian"}' ) parser.add_argument('-m', '--method', required=False, default='cartesian', \ metavar='M', type=str, help='Method to use - {"cartesian", \ "polar", "RP"}, where RP stands for reduced parameters' ) parser.add_argument('-l', '--logamp', required=False, action='store_true', \ help='Use logamp method to force positive gain amplitudes') parser.add_argument('-g', '--tilt_reg', required=False, action='store_true', \ help='Add regularization term to constrain tilt shifts to 0') parser.add_argument('-a', '--gphase_reg', required=False, action='store_true', \ help='Add regularization term to constrain the gain phase mean') parser.add_argument('-i', '--initp_jd', required=False, default=None, metavar='I', \ type=int, help='JD of to find datasets to reuse initial parameters') parser.add_argument('-v', '--noise', required=False, action='store_true', \ help='Use noise from autos in nlogL calculations') parser.add_argument('-u', '--out_dir', required=False, default=None, metavar='U', \ type=str, help='Out directory to store dataframe') parser.add_argument('-n', '--new_df', required=False, action='store_true', \ help='Write data to a new dataframe') args = parser.parse_args() startTime = datetime.datetime.now() out_fn = args.out default_fn = 'rel_df.{}.{}.{}'.format(args.jd_time, args.pol, args.dist) if out_fn is None: out_fn = default_fn if args.out_dir is not None: if not os.path.exists(args.out_dir): os.mkdir(args.out_dir) out_fn = os.path.join(args.out_dir, out_fn) if out_fn is not None: default_fn = os.path.join(args.out_dir, default_fn) out_csv = fn_format(out_fn, 'csv') out_pkl = out_csv.rsplit('.', 1)[0] + '.pkl' csv_exists = os.path.exists(out_csv) pkl_exists = os.path.exists(out_pkl) if csv_exists or pkl_exists: if args.new_df: out_csv = new_fn(out_csv, None, startTime) out_pkl = out_csv.rsplit('.', 1)[0] + '.pkl' csv_exists = False pkl_exists = False zen_fn = find_zen_file(args.jd_time) bad_ants = get_bad_ants(zen_fn) flag_type = args.flag_type if flag_type is not None: flag_fn = find_flag_file(args.jd_time, flag_type) else: flag_fn = None freq_chans = mod_str_arg(args.chans) time_ints = mod_str_arg(args.tints) hd = HERAData(zen_fn) pchans = args.chans if pchans is None: pchans = '0~{}'.format(hd.Nfreqs - 1) ptints = args.tints if ptints is None: ptints = '0~{}'.format(hd.Ntimes - 1) print('Running relative redundant calibration on visibility dataset {} for '\ 'polarization {}, frequency channel(s) {} and time integration(s) {} '\ 'with {} assumed noise distribution\n'.\ format(os.path.basename(zen_fn), args.pol, pchans, ptints, args.dist)) if freq_chans is None: freq_chans = numpy.arange(hd.Nfreqs) if time_ints is None: time_ints = numpy.arange(hd.Ntimes) indices = ['freq', 'time_int'] no_tints = len(time_ints) iter_dims = list(numpy.ndindex((len(freq_chans), no_tints))) skip_cal = False # skipping freqs and tints that are already in the dataframe if csv_exists or pkl_exists: cmap_f = dict(map(reversed, enumerate(freq_chans))) cmap_t = dict(map(reversed, enumerate(time_ints))) if csv_exists: df = pd.read_csv(out_csv, usecols=indices) idx_arr = df.values elif pkl_exists: df_pkl = pd.read_pickle(out_pkl) idx_arr = df_pkl.index.values done = [(cmap_f[f], cmap_t[t]) for (f, t) in idx_arr if (f in freq_chans \ and t in time_ints)] iter_dims = [idim for idim in iter_dims if idim not in done] if not any(iter_dims): print('Solutions to all specified frequency channels and time '\ 'integrations already exist in {}\n'.format(out_pkl)) skip_cal = True if not skip_cal: grp = group_data(zen_fn, args.pol, freq_chans, time_ints, \ bad_ants, flag_path=flag_fn, noise=args.noise) if not args.noise: _, RedG, cData = grp noisec = None else: _, RedG, cData, cNData = grp flags = cData.mask cData = cData.data # to get fields for the csv header ants = numpy.unique(RedG[:, 1:]) no_ants = ants.size no_unq_bls = numpy.unique(RedG[:, 0]).size cRedG = relabelAnts(RedG) psize = (no_ants + no_unq_bls) * 2 if args.tilt_reg: ant_pos_arr = flt_ant_pos(hd.antpos, ants) else: ant_pos_arr = None # discarding 'jac', 'hess_inv', 'nfev', 'njev' slct_keys = ['success', 'status', 'message', 'fun', 'nit', 'x'] header = slct_keys[:-1] + list(numpy.arange(psize)) + indices # remove flagged channels from iter_dims if True in flags: flg_chans = numpy.where(flags.all(axis=(1, 2)))[0] # indices print('Flagged channels for visibility dataset {} are: {}\n'.\ format(os.path.basename(zen_fn), freq_chans[flg_chans])) iter_dims = [ idim for idim in iter_dims if idim[0] not in flg_chans ] if not iter_dims: # check if slices to solve are empty print('All specified channels are flagged. Exiting.') sys.exit() if args.initp_jd is not None: jd_time2 = match_lst(args.jd_time, args.initp_jd) if len(str(jd_time2)) < 13: jd_time2 = str( jd_time2 ) + '0' # add a trailing 0 that is omitted in float rel_df_path1 = find_rel_df(jd_time2, args.pol, args.dist) if isinstance(jd_time2, str): jd_time2 = float(jd_time2) last_df = pd.read_pickle('jd_lst_map_idr2.pkl') last1 = last_df[last_df['JD_time'] == float( args.jd_time)]['LASTs'].values[0] last2 = last_df[last_df['JD_time'] == jd_time2]['LASTs'].values[0] _, offset = find_nearest(last2, last1[0]) rel_df1 = pd.read_pickle(rel_df_path1) rel_df1 = rel_df1[ rel_df1.index.get_level_values('time_int') >= offset] next_row = numpy.where(last_df['JD_time'] == jd_time2)[0][0] + 1 rel_df_path2 = find_rel_df(last_df.iloc[next_row]['JD_time'], args.pol, \ args.dist) rel_df2 = pd.read_pickle(rel_df_path2) rel_df2 = rel_df2[ rel_df2.index.get_level_values('time_int') < offset] rel_df_c = pd.concat([rel_df1, rel_df2]) # filter by specified channels and time integrations time_ints_offset = (time_ints + offset) % hd.Ntimes freq_flt = numpy.in1d(rel_df_c.index.get_level_values('freq'), freq_chans) tint_flt = numpy.in1d(rel_df_c.index.get_level_values('time_int'), time_ints_offset) rel_df_c = rel_df_c[freq_flt & tint_flt] time_ints2 = numpy.tile( rel_df_c.index.get_level_values('time_int').unique().values, freq_chans.size) iter_dims = [ idim + (tint, ) for idim, tint in zip(iter_dims, time_ints2) ] phase_reg_initp = True else: phase_reg_initp = False def cal(credg, distribution, coords, no_unq_bls, no_ants, logamp, \ tilt_reg, gphase_reg, ant_pos_arr, obsvis, noise, initp): """Relative redundant calibration with doRelCal: unconstrained minimizer using cartesian coordinates - this is the fastest solver :param credg: Grouped baselines, condensed so that antennas are consecutively labelled. See relabelAnts :type credg: ndarray :param distribution: Distribution to fit likelihood {'gaussian', 'cauchy'} :type distribution: str :param coords: Coordinate system in which gain and visibility parameters have been set up :type coords: str {"cartesian", "polar"} :param no_unq_bls: Number of unique baselines (equivalently the number of redundant visibilities) :type no_unq_bls: int :param no_ants: Number of antennas for given observation :type no_ants: int :param logamp: The logarithm of the amplitude initial parameters is taken, such that only positive solutions can be returned. Only if coords=="polar". :type logamp: bool :param tilt_reg: Add regularization term to constrain tilt shifts to 0 :type tilt_reg: bool :param gphase_reg: Add regularization term to constrain the gain phase mean :type gphase_reg: bool :param ant_pos_arr: Array of filtered antenna position coordinates for the antennas in ants. See flt_ant_pos. :type ant_pos_arr: ndarray :param obsvis: Observed sky visibilities for a given frequency and given time, reformatted to have format consistent with redg :type obsvis: ndarray :param noise: Noise array to feed into log-likelihood calculations :type noise: ndarray :param initp: Initial parameter guesses for true visibilities and gains :type initp: ndarray, None :return: Optimization result for the solved antenna gains and true sky visibilities :rtype: Scipy optimization result object """ res_rel, initp_new = doRelCal(credg, obsvis, no_unq_bls, no_ants, \ coords=coords, distribution=distribution, noise=noise, \ norm_gains=True, logamp=logamp, tilt_reg=tilt_reg, \ gphase_reg=gphase_reg, ant_pos_arr=ant_pos_arr, initp=initp, \ return_initp=True, phase_reg_initp=phase_reg_initp) res_rel = {key: res_rel[key] for key in slct_keys} # use solution for next solve in iteration if res_rel['success']: initp = initp_new return res_rel, initp def cal_RP(credg, distribution, no_unq_bls, no_ants, logamp, \ tilt_reg, gphase_reg, ant_pos_arr, obsvis, noise, initp): """Relative redundant calibration with doRelCalRP: constrained minimizer (by reducing the number of parameters) using polar coordinates :param credg: Grouped baselines, condensed so that antennas are consecutively labelled. See relabelAnts :type credg: ndarray :param distribution: Distribution to fit likelihood {'gaussian', 'cauchy'} :type distribution: str :param no_unq_bls: Number of unique baselines (equivalently the number of redundant visibilities) :type no_unq_bls: int :param no_ants: Number of antennas for given observation :type no_ants: int :param logamp: The logarithm of the amplitude initial parameters is taken, such that only positive solutions can be returned. Only if coords=="polar". :type logamp: bool :param tilt_reg: Add regularization term to constrain tilt shifts to 0 :type tilt_reg: bool :param gphase_reg: Add regularization term to constrain the gain phase mean :type gphase_reg: bool :param ant_pos_arr: Array of filtered antenna position coordinates for the antennas in ants. See flt_ant_pos. :type ant_pos_arr: ndarray :param obsvis: Observed sky visibilities for a given frequency and given time, reformatted to have format consistent with redg :type obsvis: ndarray :param noise: Noise array to feed into log-likelihood calculations :type noise: ndarray :param initp: Initial parameter guesses for true visibilities and gains :type initp: ndarray, None :return: Optimization result for the solved antenna gains and true sky visibilities :rtype: Scipy optimization result object """ res_rel, initp_ = doRelCalRP(credg, obsvis, no_unq_bls, no_ants, \ distribution=distribution, noise=noise, constr_phase=True, \ amp_constr='prod', bounded=True, logamp=logamp, tilt_reg=tilt_reg, \ gphase_reg=gphase_reg, ant_pos_arr=gphase_reg, initp=initp) res_rel = {key: res_rel[key] for key in slct_keys} # use solution for next solve in iteration if res_rel['success']: initp = initp_ return res_rel, initp if args.method.upper() == 'RP': RelCal = functools.partial(cal_RP, cRedG, args.dist, no_unq_bls, no_ants, \ args.logamp, args.tilt_reg, args.gphase_reg, \ ant_pos_arr) coords = 'polar' else: RelCal = functools.partial(cal, cRedG, args.dist, args.method, no_unq_bls, \ no_ants, args.logamp, args.tilt_reg, \ args.gphase_reg, ant_pos_arr) coords = args.method stdout = io.StringIO() with redirect_stdout(stdout): # suppress output with open(out_csv, 'a') as f: # write / append to csv file writer = DictWriter(f, fieldnames=header) if not csv_exists: writer.writeheader() initp = None for i, iter_dim in enumerate(iter_dims): if args.initp_jd is not None: initp = rel_df_c.loc[(freq_chans[iter_dim[0]], iter_dim[2])]\ [len(slct_keys[:-1]):-2].values.astype(float) if args.noise: noisec = cNData[iter_dim[:2]] res_rel, initp = RelCal(cData[iter_dim[:2]], noisec, initp) # expanding out the solution for j, param in enumerate(res_rel['x']): res_rel[j] = param # reset initp after each frequency slice if not (i + 1) % no_tints and args.initp_jd is None: initp = None del res_rel['x'] res_rel.update({indices[0]:freq_chans[iter_dim[0]], \ indices[1]:time_ints[iter_dim[1]]}) writer.writerow(res_rel) print('Relative calibration results saved to csv file {}'.format( out_csv)) df = pd.read_csv(out_csv) if csv_exists: freqs = df['freq'].unique() tints = df['time_int'].unique() if cData.shape[0] != freqs.size or cData.shape[1] != tints.size: _, _, cData = group_data(zen_fn, args.pol, freqs, tints, \ bad_ants, flag_path=flag_fn) cData = cData.data df.set_index(indices, inplace=True) # we now append the residuals as additional columns df = append_residuals_rel(df, cData, cRedG, coords, out_fn=None) if pkl_exists and not csv_exists: df = pd.concat([df, df_pkl]) df.sort_values(by=indices, inplace=True) df.to_pickle(out_pkl) print('Relative calibration results dataframe pickled to {}'.format( out_pkl)) # creating metadata file out_md = default_fn.rsplit('.', 1)[0] + '.md.pkl' if not os.path.exists(out_md): md = {'no_ants':no_ants, 'no_unq_bls':no_unq_bls, 'redg':RedG, \ 'antpos':hd.antpos, 'last':hd.lsts, 'Nfreqs':hd.Nfreqs, \ 'Ntimes':hd.Ntimes} with open(out_md, 'wb') as f: pickle.dump(md, f, protocol=pickle.HIGHEST_PROTOCOL) print( 'Relative calibration metadata pickled to {}\n'.format(out_md)) print('Script run time: {}'.format(datetime.datetime.now() - startTime))
def align_df(df_type, JD_time, JD_comp, dir_path, ndist, pol, JD_anchor=2458099): """Build the dataframe on a separate JD that is aligned in LAST with that on jd_time (e.g. rel_df or deg_df) - due to offset in LAST, this requires the concatenation of two separate dataframes. :param df_type: What dataframes are being joined {"rel", "deg"} :type df_type: str :param JD_time: Fractional Julian date of dataframe we wish to be aligned to :type JD_time: float, str :param JD_comp: JD day to align :type JD_comp: str :param dir_path: Directory in which dataframes are located :type dir_path: str :param ndist: Noise distribution for calibration {"cauchy", "gaussian"} :type ndist: str :param pol: Polarization of data :type pol: str :param JD_anchor: JD of anchor day used in degenerate comparison; only if df_type = "deg" :type JD_anchor: int :return: Bad antennas :rtype: ndarray """ # find dataset from specified JD that contains visibilities at the same LAST JD_timea = match_lst(JD_time, JD_comp) JD_timea = check_jdt(JD_timea) # aligning datasets in LAST jd_lst_map_fn = os.path.join(os.path.dirname(__file__), 'jd_lst_map_idr2.pkl') last_df = pd.read_pickle(jd_lst_map_fn) last1 = last_df[last_df['JD_time'] == float(JD_time)]['LASTs'].values[0] last2 = last_df[last_df['JD_time'] == float(JD_timea)]['LASTs'].values[0] _, offset = find_nearest(last2, last1[0]) next_row = numpy.where(last_df['JD_time'] == float(JD_timea))[0][0] + 1 JD_timeb = last_df.iloc[next_row]['JD_time'] JD_timeb = check_jdt(JD_timeb) if df_type == 'rel': tidx = 'time_int' indices = ['freq', tidx] df_patha = find_rel_df(JD_timea, pol, ndist, dir_path) df_pathb = find_rel_df(JD_timeb, pol, ndist, dir_path) if df_type == 'deg': tidx = 'time_int1' indices = ['freq', tidx] df_patha = find_deg_df(JD_timea, pol, 'jd.{}'.format(JD_anchor), ndist, \ dir_path) df_pathb = find_deg_df(JD_timeb, pol, 'jd.{}'.format(JD_anchor), ndist, \ dir_path) dfb = pd.read_pickle(df_pathb) dfa = pd.read_pickle(df_patha) Nfreqs = dfa.index.get_level_values('freq').unique().size Ntints = dfa.index.get_level_values(tidx).unique().size dfa = dfa[dfa.index.get_level_values(tidx) >= offset] dfa.sort_index(level=indices, inplace=True) # shifting tints to align with those from JD_time dfa.reset_index(inplace=True) dfa[tidx] = numpy.tile(numpy.arange(Ntints - offset), Nfreqs) dfa.set_index(indices, inplace=True) dfb = dfb[dfb.index.get_level_values(tidx) < offset] dfb.sort_index(level=indices, inplace=True) # shifting tints to align with those from JD_time dfb.reset_index(inplace=True) dfb[tidx] = numpy.tile(numpy.arange(Ntints - offset, Ntints), Nfreqs) dfb.set_index(indices, inplace=True) # combined results dataframes that is now alinged in LAST by row number # with the dataframe labelled by JD_time df_c = pd.concat([dfa, dfb]) df_c.sort_index(inplace=True) return df_c
def XDgroup_data(JD_time, JDs, pol, chans=None, tints=None, bad_ants=True, \ use_flags='first', noise=False, use_cal=None, verbose=False): """Returns redundant baseline grouping and reformatted dataset, with external flags applied, if specified :param JD_time: Julian time of 1st dataset, which sets times for others :type JD_time: str :param JDs: Julian days of data :type JDs: list, ndarray :param pol: Polarization of data :type pol: str :param chans: Frequency channel(s) {0, 1023} (None to choose all) :type chans: array-like, int, or None :param tints: Time integrations {0, 59} (None to choose all) :type tints: array-like, int, or None :param bad_ants: Flag known bad antennas, optional :type bad_ants: bool :param use_flags: Use flags to mask data :type use_flags: str :param noise: Also calculate noise from autocorrelations :type noise: bool :param use_cal: calfits file extension to use to calibrate data :type use_cal: str, None :param verbose: Print data gathering steps for each dataset :type verbose: bool :return hd: HERAData class :rtype hd: HERAData class :return redg: Grouped baselines, as returned by groupBls :rtype redg: ndarray :return cdata: Grouped visibilities with flags in numpy MaskedArray format, with format consistent with redg and dimensions (freq chans, time integrations, baselines) :rtype cdata: MaskedArray """ if isinstance(chans, int): chans = np.asarray([chans]) if isinstance(tints, int): tints = np.asarray([tints]) zen_fn = find_zen_file(JD_time) flags_fn = find_flag_file(JD_time, use_flags) hd = HERAData(zen_fn) if tints is None: tints = np.arange(hd.Ntimes) if bad_ants: bad_ants = union_bad_ants(JDs) else: bad_ants = None if use_cal is None: cal_path = None else: cal_path = find_flag_file(JD_time, use_cal) if not verbose: grp_data = suppressOutput(group_data) else: grp_data = group_data grp = grp_data(zen_fn, pol, chans=chans, tints=tints, bad_ants=bad_ants, flag_path=flags_fn, noise=noise, cal_path=cal_path) _, redg, cMData = grp[:3] cMData = cMData[np.newaxis, :] if noise: cNoise = grp[3] cNoise = cNoise[np.newaxis, :] JD_day = int(float(JD_time)) if JD_day in JDs: JDs = list(JDs) JDs.remove(JD_day) for jd_i in JDs: JD_time_ia = match_lst(JD_time, jd_i) # aligning datasets in LAST last_df = pd.read_pickle( os.path.join(os.path.dirname(__file__), 'jd_lst_map_idr2.pkl')) last1 = last_df[last_df['JD_time'] == float( JD_time)]['LASTs'].values[0] last2 = last_df[last_df['JD_time'] == float( JD_time_ia)]['LASTs'].values[0] _, offset = find_nearest(last2, last1[0]) tints_i = (tints + offset) % 60 scnd_dataset = all(tints + offset > hd.Ntimes - 1) single_dataset = all(tints + offset < hd.Ntimes - 1) or scnd_dataset if not single_dataset: tints_ia, tints_ib = np.split(tints_i, np.where(tints_i == 0)[0]) else: tints_ia = tints_i if scnd_dataset: next_row = numpy.where( last_df['JD_time'] == float(JD_time_ia))[0][0] + 1 JD_time_ib = last_df.iloc[next_row]['JD_time'] JD_time_ia = JD_time_ib JD_time_ia = check_jdt(JD_time_ia) zen_fn_ia = find_zen_file(JD_time_ia) flags_fn_ia = find_flag_file(JD_time_ia, use_flags) if use_cal is not None: cal_path_ia = find_flag_file(JD_time_ia, use_cal) else: cal_path_ia = None grp_a = grp_data(zen_fn_ia, pol, chans=chans, tints=tints_ia, \ bad_ants=bad_ants, flag_path=flags_fn_ia, noise=noise, \ cal_path=cal_path_ia) cMData_ia = grp_a[2] if not single_dataset: next_row = numpy.where( last_df['JD_time'] == float(JD_time_ia))[0][0] + 1 JD_time_ib = last_df.iloc[next_row]['JD_time'] JD_time_ib = check_jdt(JD_time_ib) zen_fn_ib = find_zen_file(JD_time_ib) flags_fn_ib = find_flag_file(JD_time_ib, use_flags) if use_cal is not None: cal_path_ib = find_flag_file(JD_time_ib, use_cal) else: cal_path_ib = None grp_b = grp_data(zen_fn_ib, pol, chans=chans, tints=tints_ib, \ bad_ants=bad_ants, flag_path=flags_fn_ib, \ noise=noise, cal_path=cal_path_ib) cMData_ib = grp_b[2] cMData_i = numpy.ma.concatenate((cMData_ia, cMData_ib), axis=1) else: cMData_i = cMData_ia cMData_i = cMData_i[np.newaxis, :] cMData = numpy.ma.concatenate((cMData, cMData_i), axis=0) if noise: cNoise_ia = grp_a[3] if not single_dataset: cNoise_ib = grp_b[3] cNoise_i = np.concatenate((cNoise_ia, cNoise_ib), axis=1) else: cNoise_i = cNoise_ia cNoise_i = cNoise_i[np.newaxis, :] cNoise = np.concatenate((cNoise, cNoise_i), axis=0) if noise: return hd, redg, cMData, cNoise else: return hd, redg, cMData
def main(): parser = argparse.ArgumentParser(formatter_class=argparse.\ RawDescriptionHelpFormatter, description=textwrap.dedent(""" Degenerate fitting of relatively calibrated visibility solutions Takes the relatively calibrated visibility solutions of HERA data, and fits degenerate parameters (overall amplitude, overall phase, and phase gradient) to two adjacent datasets in either LAST, frequency, or JD. Returns a pickled pandas dataframe of the Scipy optimization results for the degenerate fitting. """)) parser.add_argument('jd_time', help='Fractional JD time of dataset to \ analyze', metavar='JD', type=str) parser.add_argument('-o', '--out', required=False, default=None, \ metavar='O', type=str, help='Output csv and df name') parser.add_argument('-p', '--pol', required=True, metavar='P', type=str, \ help='Polarization {"ee", "en", "nn", "ne"}') parser.add_argument('-x', '--deg_dim', required=True, metavar='X', type=str, \ help='Which dimension to compare relatively calibrated \ visibility solutions {"tint", "freq", "jd"}') parser.add_argument('-m', '--coords', required=False, default='cartesian', \ metavar='M', type=str, help='Coordinates used for rel cal \ results - {"cartesian", "polar"}') parser.add_argument('-c', '--chans', required=False, default=None, metavar='C', \ type=str, help='Frequency channels to fit {0, 1023}') parser.add_argument('-t', '--tints', required=False, default=None, metavar='T', \ type=str, help='Time integrations to fit {0, 59}') parser.add_argument('-d', '--dist', required=True, default='cauchy', metavar='D', \ type=str, help='Fitting distribution for calibration \ {"cauchy", "gaussian"}') parser.add_argument('-j', '--tgt_jd', required=False, default=None, metavar='J', \ type=int, help='JD day for fitting across JDs - only if \ deg_dim = "jd". Default to pick consecutive JD day') parser.add_argument('-r', '--rel_dir', required=False, default=None, metavar='R', \ type=str, help='Directory in which relative calibration \ results dataframes are located') parser.add_argument('-u', '--out_dir', required=False, default=None, metavar='U', \ type=str, help='Out directory to store dataframe') parser.add_argument('-n', '--new_df', required=False, action='store_true', \ help='Write data to a new csv file') parser.add_argument('-k', '--compression', required=False, default=None, metavar='K', \ type=str, help='Compression to use when pickling results dataframe') args = parser.parse_args() startTime = datetime.datetime.now() pjd = '' if args.deg_dim == 'jd': tgt_jd = args.tgt_jd if tgt_jd is None: # choose consecutive JD as default tgt_jd = int(float(args.jd_time)) + 1 pjd = '.' + str(tgt_jd) out_fn = args.out if out_fn is None: out_fn = 'deg_df.{}.{}.{}{}.{}'.format(args.jd_time, args.pol, \ args.deg_dim, pjd, args.dist) if args.out_dir is not None: if not os.path.exists(args.out_dir): os.mkdir(args.out_dir) out_fn = os.path.join(args.out_dir, out_fn) out_csv = fn_format(out_fn, 'csv') out_pkl = out_csv.rsplit('.', 1)[0] + '.pkl' csv_exists = os.path.exists(out_csv) pkl_exists = os.path.exists(out_pkl) if csv_exists or pkl_exists: if args.new_df: out_csv = new_fn(out_csv, None, startTime) out_pkl = out_csv.rsplit('.', 1)[0] + '.pkl' csv_exists = False pkl_exists = False freq_chans = mod_str_arg(args.chans) time_ints = mod_str_arg(args.tints) rel_df_path = find_rel_df(args.jd_time, args.pol, args.dist, args.rel_dir) rel_df = pd.read_pickle(rel_df_path) # retrieving visibility metadata md_fn = 'rel_df.{}.{}.md.pkl'.format(args.jd_time, args.pol) if args.rel_dir is not None: md_fn = os.path.join(args.rel_dir, md_fn) with open(md_fn, 'rb') as f: md = pickle.load(f) ant_pos = md['antpos'] no_unq_bls = md['no_unq_bls'] RedG = md['redg'] ant_sep = red_ant_sep(RedG, ant_pos) pchans = args.chans if pchans is None: pchans = '0~{}'.format(md['Nfreqs']-1) ptints = args.tints if ptints is None: ptints = '0~{}'.format(md['Ntimes']-1) pdict = {'freq':'frequency channels', 'tint':'time integrations', \ 'jd':'Julian days'} print('Running degenerate translation on adjacent {} for visibility dataset {} '\ 'for frequency channel(s) {} and time integration(s) {} with {} '\ 'assumed noise distribution\n'.format(pdict[args.deg_dim], \ os.path.basename(find_zen_file(args.jd_time)), pchans, ptints, args.dist)) if freq_chans is None: freq_chans = numpy.arange(md['Nfreqs']) if time_ints is None: time_ints = numpy.arange(md['Ntimes']) # filter by specified channels and time integrations freq_flt = numpy.in1d(rel_df.index.get_level_values('freq'), freq_chans) tint_flt = numpy.in1d(rel_df.index.get_level_values('time_int'), time_ints) rel_df = rel_df[freq_flt & tint_flt] # only getting frequencies and time integrations that exist in the df freq_chans = rel_df.index.get_level_values('freq').unique().values time_ints = rel_df.index.get_level_values('time_int').unique().values if args.deg_dim == 'freq': indices = ['freq1', 'freq2', 'time_int'] # getting adjacent frequency channel pairs iter_dims = [idim for idim in zip(freq_chans, freq_chans[1:]) if \ idim[1] - idim[0] == 1] iter_dims = [idim+(time_int,) for idim in iter_dims for time_int in \ time_ints] # adding time integrations a, b, c, d = 0, 1, 2, 2 # for iteration indexing if args.deg_dim == 'tint': indices = ['time_int1', 'time_int2', 'freq'] # getting adjacent LAST (time integration) pairs iter_dims = [idim for idim in zip(time_ints, time_ints[1:]) if \ idim[1] - idim[0] == 1] iter_dims = [idim+(freq_chan,) for idim in iter_dims for freq_chan in \ freq_chans] # adding frequency channels a, b, c, d = 2, 2, 0, 1 # for iteration indexing if args.deg_dim == 'jd': indices = ['time_int1', 'time_int2', 'freq'] # find dataset from specified JD that contains visibilities at the same LAST jd_time2 = match_lst(args.jd_time, tgt_jd) if len(str(jd_time2)) < 13: jd_time2 = str(jd_time2) + '0' # add a trailing 0 that is omitted in float rel_df_path2 = find_rel_df(jd_time2, args.pol, args.dist, args.rel_dir) if isinstance(jd_time2, str): jd_time2 = float(jd_time2) # aligning datasets in LAST last_df = pd.read_pickle('jd_lst_map_idr2.pkl') last1 = last_df[last_df['JD_time'] == float(args.jd_time)]['LASTs'].values[0] last2 = last_df[last_df['JD_time'] == jd_time2]['LASTs'].values[0] _, offset = find_nearest(last2, last1[0]) rel_df2 = pd.read_pickle(rel_df_path2) rel_df2 = rel_df2[rel_df2.index.get_level_values('time_int') >= offset] next_row = numpy.where(last_df['JD_time'] == jd_time2)[0][0] + 1 rel_df_path3 = find_rel_df(last_df.iloc[next_row]['JD_time'], args.pol, \ args.dist, args.rel_dir) rel_df3 = pd.read_pickle(rel_df_path3) rel_df3 = rel_df3[rel_df3.index.get_level_values('time_int') < offset] # combined results dataframes that is now alinged in LAST by row number # with rel_df: rel_df_c = pd.concat([rel_df2, rel_df3]) # pairing time_ints from rel_df and rel_df_c that match in LAST # time_ints2 = rel_df_c.index.get_level_values('time_int').unique().values time_ints2 = numpy.arange(offset, offset + md['Ntimes']) % md['Ntimes'] iter_dims = [idim for idim in zip(time_ints, time_ints2[time_ints])] iter_dims = [idim+(freq_chan,) for idim in iter_dims for freq_chan in \ freq_chans] iter_dims = sorted(iter_dims, key=lambda row: row[2]) # iterate across # LAST first - should speed up fitting a, b, c, d = 2, 2, 0, 1 # for iteration indexing else: rel_df_c = rel_df if not iter_dims: raise ValueError('No frequency channels or time integrations to '\ 'iterate over - check that the specified --chans '\ 'and --tints exist in the relative calibration '\ 'results dataframes') skip_cal = False # skipping freqs and tints that are already in dataframe if csv_exists or pkl_exists: if csv_exists: df = pd.read_csv(out_csv, usecols=indices) idx_arr = df.values elif pkl_exists: df_pkl = pd.read_pickle(out_pkl) idx_arr = df_pkl.reset_index()[indices].values iter_dims = [idim for idim in iter_dims if not \ numpy.equal(idx_arr, numpy.asarray(idim)).all(1).any()] if not any(iter_dims): print('Solutions to all specified frequency channels and time '\ 'integrations already exist in {}\n'.format(out_pkl)) skip_cal = True if not skip_cal: # removing 'jac', 'hess_inv', 'nfev', 'njev' slct_keys = ['success', 'status', 'message', 'fun', 'nit', 'x'] no_deg_params = 3 # overall amplitude, x phase gradient, y phase gradient header = slct_keys[:-1] + list(numpy.arange(no_deg_params)) + indices stdout = io.StringIO() with redirect_stdout(stdout): # suppress output with open(out_csv, 'a') as f: # write / append to csv file writer = DictWriter(f, fieldnames=header) if not csv_exists: writer.writeheader() initp = None for iter_dim in iter_dims: # get relatively calibrated solutions resx1 = rel_df.loc[iter_dim[a], iter_dim[c]][len(slct_keys)-1:-2]\ .values.astype(float) resx2 = rel_df_c.loc[iter_dim[b], iter_dim[d]][len(slct_keys)-1:-2]\ .values.astype(float) rel_vis1, _ = split_rel_results(resx1, no_unq_bls, coords=args.coords) rel_vis2, _ = split_rel_results(resx2, no_unq_bls, coords=args.coords) res_deg = doDegVisVis(ant_sep, rel_vis1, rel_vis2, \ distribution=args.dist, initp=initp) res_deg = {key:res_deg[key] for key in slct_keys} # expanding out the solution for i, param in enumerate(res_deg['x']): res_deg[i] = param # to use solution for next solve in iteration # if res_deg['success']: # initp = res_deg['x'] del res_deg['x'] res_deg.update({indices[i]:iter_dim[i] for i in \ range(no_deg_params)}) writer.writerow(res_deg) print('Degenerate fitting results saved to csv file {}'.format(out_csv)) df = pd.read_csv(out_csv) df_indices = indices.copy() mv_col = df_indices.pop(1) df.set_index(df_indices, inplace=True) cols = list(df.columns.values) cols.remove(mv_col) df = df[[mv_col]+cols] # we now append the residuals as additional columns df = append_residuals_deg(df, rel_df, rel_df_c, md, out_fn=None) if pkl_exists and not csv_exists: df = pd.concat([df, df_pkl]) df.sort_values(by=indices, inplace=True) if args.compression is not None: out_pkl += '.{}'.format(args.compression) print('{} compression used in pickling the dataframe'.format(args.compression)) df.to_pickle(out_pkl, compression=args.compression) print('Degenerate fitting results dataframe pickled to {}'.format(out_pkl)) print('Script run time: {}'.format(datetime.datetime.now() - startTime))