def main(): parser = argparse.ArgumentParser(formatter_class=argparse.\ RawDescriptionHelpFormatter, description=textwrap.dedent(""" Relative redundant calibration of visibilities Takes a given HERA visibility dataset in uvh5 file format and performs relative redundant calibration (up to the overall amplitude, overall phase, and phase gradient degenerate parameters) for each frequency channel and each time integration in the dataset. Returns a pickled pandas dataframe of the Scipy optimization results for the relative redundant calibration for each set of frequency channel and time integration. """)) parser.add_argument('jd_time', help='Fractional JD time of dataset to \ calibrate', metavar='JD', type=str) parser.add_argument('-o', '--out', required=False, default=None, \ metavar='O', type=str, help='Output csv and df name') parser.add_argument('-p', '--pol', required=True, metavar='P', type=str, \ help='Polarization {"ee", "en", "nn", "ne"}') parser.add_argument('-c', '--chans', required=False, default=None, metavar='C', \ type=str, help='Frequency channels to calibrate \ {0, 1023}' ) parser.add_argument('-t', '--tints', required=False, default=None, metavar='T', \ type=str, help='Time integrations to calibrate \ {0, 59}' ) parser.add_argument('-f', '--flag_type', required=False, default='first', \ metavar='F', type=str, help='Flag type e.g. "first", \ "omni", "abs"' ) parser.add_argument('-d', '--dist', required=True, metavar='D', \ type=str, help='Fitting distribution for calibration \ {"cauchy", "gaussian"}' ) parser.add_argument('-m', '--method', required=False, default='cartesian', \ metavar='M', type=str, help='Method to use - {"cartesian", \ "polar", "RP"}, where RP stands for reduced parameters' ) parser.add_argument('-l', '--logamp', required=False, action='store_true', \ help='Use logamp method to force positive gain amplitudes') parser.add_argument('-g', '--tilt_reg', required=False, action='store_true', \ help='Add regularization term to constrain tilt shifts to 0') parser.add_argument('-a', '--gphase_reg', required=False, action='store_true', \ help='Add regularization term to constrain the gain phase mean') parser.add_argument('-i', '--initp_jd', required=False, default=None, metavar='I', \ type=int, help='JD of to find datasets to reuse initial parameters') parser.add_argument('-v', '--noise', required=False, action='store_true', \ help='Use noise from autos in nlogL calculations') parser.add_argument('-u', '--out_dir', required=False, default=None, metavar='U', \ type=str, help='Out directory to store dataframe') parser.add_argument('-n', '--new_df', required=False, action='store_true', \ help='Write data to a new dataframe') args = parser.parse_args() startTime = datetime.datetime.now() out_fn = args.out default_fn = 'rel_df.{}.{}.{}'.format(args.jd_time, args.pol, args.dist) if out_fn is None: out_fn = default_fn if args.out_dir is not None: if not os.path.exists(args.out_dir): os.mkdir(args.out_dir) out_fn = os.path.join(args.out_dir, out_fn) if out_fn is not None: default_fn = os.path.join(args.out_dir, default_fn) out_csv = fn_format(out_fn, 'csv') out_pkl = out_csv.rsplit('.', 1)[0] + '.pkl' csv_exists = os.path.exists(out_csv) pkl_exists = os.path.exists(out_pkl) if csv_exists or pkl_exists: if args.new_df: out_csv = new_fn(out_csv, None, startTime) out_pkl = out_csv.rsplit('.', 1)[0] + '.pkl' csv_exists = False pkl_exists = False zen_fn = find_zen_file(args.jd_time) bad_ants = get_bad_ants(zen_fn) flag_type = args.flag_type if flag_type is not None: flag_fn = find_flag_file(args.jd_time, flag_type) else: flag_fn = None freq_chans = mod_str_arg(args.chans) time_ints = mod_str_arg(args.tints) hd = HERAData(zen_fn) pchans = args.chans if pchans is None: pchans = '0~{}'.format(hd.Nfreqs - 1) ptints = args.tints if ptints is None: ptints = '0~{}'.format(hd.Ntimes - 1) print('Running relative redundant calibration on visibility dataset {} for '\ 'polarization {}, frequency channel(s) {} and time integration(s) {} '\ 'with {} assumed noise distribution\n'.\ format(os.path.basename(zen_fn), args.pol, pchans, ptints, args.dist)) if freq_chans is None: freq_chans = numpy.arange(hd.Nfreqs) if time_ints is None: time_ints = numpy.arange(hd.Ntimes) indices = ['freq', 'time_int'] no_tints = len(time_ints) iter_dims = list(numpy.ndindex((len(freq_chans), no_tints))) skip_cal = False # skipping freqs and tints that are already in the dataframe if csv_exists or pkl_exists: cmap_f = dict(map(reversed, enumerate(freq_chans))) cmap_t = dict(map(reversed, enumerate(time_ints))) if csv_exists: df = pd.read_csv(out_csv, usecols=indices) idx_arr = df.values elif pkl_exists: df_pkl = pd.read_pickle(out_pkl) idx_arr = df_pkl.index.values done = [(cmap_f[f], cmap_t[t]) for (f, t) in idx_arr if (f in freq_chans \ and t in time_ints)] iter_dims = [idim for idim in iter_dims if idim not in done] if not any(iter_dims): print('Solutions to all specified frequency channels and time '\ 'integrations already exist in {}\n'.format(out_pkl)) skip_cal = True if not skip_cal: grp = group_data(zen_fn, args.pol, freq_chans, time_ints, \ bad_ants, flag_path=flag_fn, noise=args.noise) if not args.noise: _, RedG, cData = grp noisec = None else: _, RedG, cData, cNData = grp flags = cData.mask cData = cData.data # to get fields for the csv header ants = numpy.unique(RedG[:, 1:]) no_ants = ants.size no_unq_bls = numpy.unique(RedG[:, 0]).size cRedG = relabelAnts(RedG) psize = (no_ants + no_unq_bls) * 2 if args.tilt_reg: ant_pos_arr = flt_ant_pos(hd.antpos, ants) else: ant_pos_arr = None # discarding 'jac', 'hess_inv', 'nfev', 'njev' slct_keys = ['success', 'status', 'message', 'fun', 'nit', 'x'] header = slct_keys[:-1] + list(numpy.arange(psize)) + indices # remove flagged channels from iter_dims if True in flags: flg_chans = numpy.where(flags.all(axis=(1, 2)))[0] # indices print('Flagged channels for visibility dataset {} are: {}\n'.\ format(os.path.basename(zen_fn), freq_chans[flg_chans])) iter_dims = [ idim for idim in iter_dims if idim[0] not in flg_chans ] if not iter_dims: # check if slices to solve are empty print('All specified channels are flagged. Exiting.') sys.exit() if args.initp_jd is not None: jd_time2 = match_lst(args.jd_time, args.initp_jd) if len(str(jd_time2)) < 13: jd_time2 = str( jd_time2 ) + '0' # add a trailing 0 that is omitted in float rel_df_path1 = find_rel_df(jd_time2, args.pol, args.dist) if isinstance(jd_time2, str): jd_time2 = float(jd_time2) last_df = pd.read_pickle('jd_lst_map_idr2.pkl') last1 = last_df[last_df['JD_time'] == float( args.jd_time)]['LASTs'].values[0] last2 = last_df[last_df['JD_time'] == jd_time2]['LASTs'].values[0] _, offset = find_nearest(last2, last1[0]) rel_df1 = pd.read_pickle(rel_df_path1) rel_df1 = rel_df1[ rel_df1.index.get_level_values('time_int') >= offset] next_row = numpy.where(last_df['JD_time'] == jd_time2)[0][0] + 1 rel_df_path2 = find_rel_df(last_df.iloc[next_row]['JD_time'], args.pol, \ args.dist) rel_df2 = pd.read_pickle(rel_df_path2) rel_df2 = rel_df2[ rel_df2.index.get_level_values('time_int') < offset] rel_df_c = pd.concat([rel_df1, rel_df2]) # filter by specified channels and time integrations time_ints_offset = (time_ints + offset) % hd.Ntimes freq_flt = numpy.in1d(rel_df_c.index.get_level_values('freq'), freq_chans) tint_flt = numpy.in1d(rel_df_c.index.get_level_values('time_int'), time_ints_offset) rel_df_c = rel_df_c[freq_flt & tint_flt] time_ints2 = numpy.tile( rel_df_c.index.get_level_values('time_int').unique().values, freq_chans.size) iter_dims = [ idim + (tint, ) for idim, tint in zip(iter_dims, time_ints2) ] phase_reg_initp = True else: phase_reg_initp = False def cal(credg, distribution, coords, no_unq_bls, no_ants, logamp, \ tilt_reg, gphase_reg, ant_pos_arr, obsvis, noise, initp): """Relative redundant calibration with doRelCal: unconstrained minimizer using cartesian coordinates - this is the fastest solver :param credg: Grouped baselines, condensed so that antennas are consecutively labelled. See relabelAnts :type credg: ndarray :param distribution: Distribution to fit likelihood {'gaussian', 'cauchy'} :type distribution: str :param coords: Coordinate system in which gain and visibility parameters have been set up :type coords: str {"cartesian", "polar"} :param no_unq_bls: Number of unique baselines (equivalently the number of redundant visibilities) :type no_unq_bls: int :param no_ants: Number of antennas for given observation :type no_ants: int :param logamp: The logarithm of the amplitude initial parameters is taken, such that only positive solutions can be returned. Only if coords=="polar". :type logamp: bool :param tilt_reg: Add regularization term to constrain tilt shifts to 0 :type tilt_reg: bool :param gphase_reg: Add regularization term to constrain the gain phase mean :type gphase_reg: bool :param ant_pos_arr: Array of filtered antenna position coordinates for the antennas in ants. See flt_ant_pos. :type ant_pos_arr: ndarray :param obsvis: Observed sky visibilities for a given frequency and given time, reformatted to have format consistent with redg :type obsvis: ndarray :param noise: Noise array to feed into log-likelihood calculations :type noise: ndarray :param initp: Initial parameter guesses for true visibilities and gains :type initp: ndarray, None :return: Optimization result for the solved antenna gains and true sky visibilities :rtype: Scipy optimization result object """ res_rel, initp_new = doRelCal(credg, obsvis, no_unq_bls, no_ants, \ coords=coords, distribution=distribution, noise=noise, \ norm_gains=True, logamp=logamp, tilt_reg=tilt_reg, \ gphase_reg=gphase_reg, ant_pos_arr=ant_pos_arr, initp=initp, \ return_initp=True, phase_reg_initp=phase_reg_initp) res_rel = {key: res_rel[key] for key in slct_keys} # use solution for next solve in iteration if res_rel['success']: initp = initp_new return res_rel, initp def cal_RP(credg, distribution, no_unq_bls, no_ants, logamp, \ tilt_reg, gphase_reg, ant_pos_arr, obsvis, noise, initp): """Relative redundant calibration with doRelCalRP: constrained minimizer (by reducing the number of parameters) using polar coordinates :param credg: Grouped baselines, condensed so that antennas are consecutively labelled. See relabelAnts :type credg: ndarray :param distribution: Distribution to fit likelihood {'gaussian', 'cauchy'} :type distribution: str :param no_unq_bls: Number of unique baselines (equivalently the number of redundant visibilities) :type no_unq_bls: int :param no_ants: Number of antennas for given observation :type no_ants: int :param logamp: The logarithm of the amplitude initial parameters is taken, such that only positive solutions can be returned. Only if coords=="polar". :type logamp: bool :param tilt_reg: Add regularization term to constrain tilt shifts to 0 :type tilt_reg: bool :param gphase_reg: Add regularization term to constrain the gain phase mean :type gphase_reg: bool :param ant_pos_arr: Array of filtered antenna position coordinates for the antennas in ants. See flt_ant_pos. :type ant_pos_arr: ndarray :param obsvis: Observed sky visibilities for a given frequency and given time, reformatted to have format consistent with redg :type obsvis: ndarray :param noise: Noise array to feed into log-likelihood calculations :type noise: ndarray :param initp: Initial parameter guesses for true visibilities and gains :type initp: ndarray, None :return: Optimization result for the solved antenna gains and true sky visibilities :rtype: Scipy optimization result object """ res_rel, initp_ = doRelCalRP(credg, obsvis, no_unq_bls, no_ants, \ distribution=distribution, noise=noise, constr_phase=True, \ amp_constr='prod', bounded=True, logamp=logamp, tilt_reg=tilt_reg, \ gphase_reg=gphase_reg, ant_pos_arr=gphase_reg, initp=initp) res_rel = {key: res_rel[key] for key in slct_keys} # use solution for next solve in iteration if res_rel['success']: initp = initp_ return res_rel, initp if args.method.upper() == 'RP': RelCal = functools.partial(cal_RP, cRedG, args.dist, no_unq_bls, no_ants, \ args.logamp, args.tilt_reg, args.gphase_reg, \ ant_pos_arr) coords = 'polar' else: RelCal = functools.partial(cal, cRedG, args.dist, args.method, no_unq_bls, \ no_ants, args.logamp, args.tilt_reg, \ args.gphase_reg, ant_pos_arr) coords = args.method stdout = io.StringIO() with redirect_stdout(stdout): # suppress output with open(out_csv, 'a') as f: # write / append to csv file writer = DictWriter(f, fieldnames=header) if not csv_exists: writer.writeheader() initp = None for i, iter_dim in enumerate(iter_dims): if args.initp_jd is not None: initp = rel_df_c.loc[(freq_chans[iter_dim[0]], iter_dim[2])]\ [len(slct_keys[:-1]):-2].values.astype(float) if args.noise: noisec = cNData[iter_dim[:2]] res_rel, initp = RelCal(cData[iter_dim[:2]], noisec, initp) # expanding out the solution for j, param in enumerate(res_rel['x']): res_rel[j] = param # reset initp after each frequency slice if not (i + 1) % no_tints and args.initp_jd is None: initp = None del res_rel['x'] res_rel.update({indices[0]:freq_chans[iter_dim[0]], \ indices[1]:time_ints[iter_dim[1]]}) writer.writerow(res_rel) print('Relative calibration results saved to csv file {}'.format( out_csv)) df = pd.read_csv(out_csv) if csv_exists: freqs = df['freq'].unique() tints = df['time_int'].unique() if cData.shape[0] != freqs.size or cData.shape[1] != tints.size: _, _, cData = group_data(zen_fn, args.pol, freqs, tints, \ bad_ants, flag_path=flag_fn) cData = cData.data df.set_index(indices, inplace=True) # we now append the residuals as additional columns df = append_residuals_rel(df, cData, cRedG, coords, out_fn=None) if pkl_exists and not csv_exists: df = pd.concat([df, df_pkl]) df.sort_values(by=indices, inplace=True) df.to_pickle(out_pkl) print('Relative calibration results dataframe pickled to {}'.format( out_pkl)) # creating metadata file out_md = default_fn.rsplit('.', 1)[0] + '.md.pkl' if not os.path.exists(out_md): md = {'no_ants':no_ants, 'no_unq_bls':no_unq_bls, 'redg':RedG, \ 'antpos':hd.antpos, 'last':hd.lsts, 'Nfreqs':hd.Nfreqs, \ 'Ntimes':hd.Ntimes} with open(out_md, 'wb') as f: pickle.dump(md, f, protocol=pickle.HIGHEST_PROTOCOL) print( 'Relative calibration metadata pickled to {}\n'.format(out_md)) print('Script run time: {}'.format(datetime.datetime.now() - startTime))
def main(): parser = argparse.ArgumentParser(formatter_class=argparse.\ RawDescriptionHelpFormatter, description=textwrap.dedent(""" Absolute optimal calibration of relatively calibrated visibility solutions Takes the relatively calibrated visibility solutions of HERA data, and constrains their degenerate parameters, such that average amplitude of antenna gains is set to 1, the average phase of antenna gains is set to 0, the overall phase if set to 0 and the phase gradients are 0. Returns a pickled pandas dataframe of the Scipy optimization results for the absolute optimal calibration. """)) parser.add_argument('jd_time', help='Fractional JD time of dataset to \ analyze', metavar='JD', type=str) parser.add_argument('-o', '--out', required=False, default=None, \ metavar='O', type=str, help='Output csv and df name') parser.add_argument('-p', '--pol', required=True, metavar='P', type=str, \ help='Polarization {"ee", "en", "nn", "ne"}') parser.add_argument('-c', '--chans', required=False, default=None, metavar='C', \ type=str, help='Frequency channels to fit {0, 1023}') parser.add_argument('-t', '--tints', required=False, default=None, metavar='T', \ type=str, help='Time integrations to fit {0, 59}') parser.add_argument('-d', '--dist', required=True, metavar='D', \ type=str, help='Fitting distribution for calibration \ {"cauchy", "gaussian"}') parser.add_argument('-a', '--ref_ant_idx', required=False, default=16, metavar='A', \ type=int, help='Reference antenna index to set the overall \ phase') parser.add_argument('-l', '--logamp', required=False, action='store_true', \ help='Use logamp method to force positive gain amplitudes') parser.add_argument('-r', '--rel_dir', required=False, default=None, metavar='R', \ type=str, help='Directory in which relative calibration \ results dataframes are located') parser.add_argument('-u', '--out_dir', required=False, default=None, metavar='U', \ type=str, help='Out directory to store dataframe') parser.add_argument('-n', '--new_df', required=False, action='store_true', \ help='Write data to a new csv file') parser.add_argument('-k', '--compression', required=False, default=None, metavar='K', \ type=str, help='Compression to use when pickling results dataframe') args = parser.parse_args() startTime = datetime.datetime.now() out_fn = args.out if out_fn is None: out_fn = 'opt_df.{}.{}.{}'.format(args.jd_time, args.pol, args.dist) if args.out_dir is not None: if not os.path.exists(args.out_dir): os.mkdir(args.out_dir) out_fn = os.path.join(args.out_dir, out_fn) out_csv = fn_format(out_fn, 'csv') out_pkl = out_csv.rsplit('.', 1)[0] + '.pkl' csv_exists = os.path.exists(out_csv) pkl_exists = os.path.exists(out_pkl) if csv_exists or pkl_exists: if args.new_df: out_csv = new_fn(out_csv, None, startTime) out_pkl = out_csv.rsplit('.', 1)[0] + '.pkl' csv_exists = False pkl_exists = False freq_chans = mod_str_arg(args.chans) time_ints = mod_str_arg(args.tints) zen_fn = find_zen_file(args.jd_time) bad_ants = get_bad_ants(zen_fn) flag_fn = find_flag_file(args.jd_time, 'first') # returns None if not found rel_df_path = find_rel_df(args.jd_time, args.pol, args.dist, args.rel_dir) rel_df = pd.read_pickle(rel_df_path) # retrieving visibility metadata md_fn = 'rel_df.{}.{}.md.pkl'.format(args.jd_time, args.pol) if args.rel_dir is not None: md_fn = os.path.join(args.rel_dir, md_fn) with open(md_fn, 'rb') as f: md = pickle.load(f) antpos = md['antpos'] no_unq_bls = md['no_unq_bls'] redg = md['redg'] pchans = args.chans if pchans is None: pchans = '0~{}'.format(md['Nfreqs']-1) ptints = args.tints if ptints is None: ptints = '0~{}'.format(md['Ntimes']-1) print('Running absolute optimal calibration for visibility dataset {} '\ 'for frequency channel(s) {} and time integration(s) {} '\ 'with {} assumed noise distribution\n'.\ format(os.path.basename(zen_fn), pchans, ptints, args.dist)) if freq_chans is None: freq_chans = numpy.arange(md['Nfreqs']) if time_ints is None: time_ints = numpy.arange(md['Ntimes']) # filter by specified channels and time integrations freq_flt = numpy.in1d(rel_df.index.get_level_values('freq'), freq_chans) tint_flt = numpy.in1d(rel_df.index.get_level_values('time_int'), time_ints) rel_df = rel_df[freq_flt & tint_flt] # only getting frequencies and time integrations that exist in the df freq_chans = rel_df.index.get_level_values('freq').unique().values time_ints = rel_df.index.get_level_values('time_int').unique().values indices = ['freq', 'time_int'] no_tints = len(time_ints) iter_dims = list(numpy.ndindex((len(freq_chans), no_tints))) if not iter_dims: raise ValueError('No frequency channels or time integrations to '\ 'iterate over - check that the specified --chans and --tints exist '\ 'in the relative calibration results dataframes') skip_cal = False # skipping freqs and tints that are already in dataframe if csv_exists or pkl_exists: cmap_f = dict(map(reversed, enumerate(freq_chans))) cmap_t = dict(map(reversed, enumerate(time_ints))) if csv_exists: df = pd.read_csv(out_csv, usecols=indices) idx_arr = df.values elif pkl_exists: df_pkl = pd.read_pickle(out_pkl) idx_arr = df_pkl.reset_index()[indices].values done = [(cmap_f[f], cmap_t[t]) for (f, t) in idx_arr if (f in freq_chans \ and t in time_ints)] iter_dims = [idim for idim in iter_dims if idim not in done] if not any(iter_dims): print('Solutions to all specified frequency channels and time '\ 'integrations already exist in {}\n'.format(out_pkl)) skip_cal = True if not skip_cal: hd, RedG, cData = group_data(zen_fn, args.pol, freq_chans, time_ints, \ bad_ants, flag_path=flag_fn) flags = cData.mask cData = cData.data ants = numpy.unique(RedG[:, 1:]) no_ants = ants.size no_unq_bls = numpy.unique(RedG[:, 0]).size cRedG = relabelAnts(RedG) # removing 'jac', 'hess_inv', 'nfev', 'njev' slct_keys = ['success', 'status', 'message', 'fun', 'nit', 'x'] no_deg_params = 4 # overall amplitude, overall phase, x & y phase gradients psize = no_ants*2 + no_deg_params + no_unq_bls*2 header = slct_keys[:-1] + list(numpy.arange(psize)) + indices ant_pos_arr = flt_ant_pos(hd.antpos, ants) ant_sep = red_ant_sep(RedG, hd.antpos) def get_w_alpha(res_rel_vis, new_deg_params): """Apply degenerate parameters found from optimal absolute calibration to visibility solutions from relative redundant calibration :param res_rel_vis: Visibility solutions :type res_rel_vis: ndarray :param new_deg_params: Degenerate parameters optimal calibration :type new_deg_params: ndarray :return: Degenerately transformed visibility solutions :rtype: ndarray """ return degVis(ant_sep, res_rel_vis, *new_deg_params[[0, 2, 3]]) stdout = io.StringIO() with redirect_stdout(stdout): # suppress output with open(out_csv, 'a') as f: # write / append to csv file writer = DictWriter(f, fieldnames=header) if not csv_exists: writer.writeheader() initp = None for i, iter_dim in enumerate(iter_dims): # get absolute optimal calibrated solutions rel_idim = (freq_chans[iter_dim[0]], time_ints[iter_dim[1]]) res_rel_vis, _ = split_rel_results(rel_df.loc[rel_idim]\ [len(slct_keys)-1:-2].values.astype(float), no_unq_bls) res_opt = doOptCal(cRedG, cData[iter_dim], no_ants, ant_pos_arr, \ ant_sep, res_rel_vis, distribution=args.dist, \ ref_ant_idx=args.ref_ant_idx, logamp=args.logamp, \ initp=initp) res_opt = {key:res_opt[key] for key in slct_keys} # get the new visibility solutions w_alpha = get_w_alpha(res_rel_vis, res_opt['x'][-no_deg_params:]) w_alpha_comps = decomposeCArray(w_alpha) all_params = numpy.append(res_opt['x'], w_alpha_comps) # expanding out the solution for j, param in enumerate(all_params): res_opt[j] = param # to use solution for next solve in iteration if res_opt['success']: initp = res_opt['x'] # reset initp after each frequency slice if not (i+1)%no_tints: initp = None del res_opt['x'] res_opt.update({indices[0]:rel_idim[0], \ indices[1]:rel_idim[1]}) writer.writerow(res_opt) print('Absolute optimal calibration results saved to csv file {}'\ .format(out_csv)) df = pd.read_csv(out_csv) df.set_index(indices, inplace=True) df = append_residuals_opt(df, cData, cRedG, out_fn=None) if pkl_exists and not csv_exists: df = pd.concat([df, df_pkl]) df.sort_values(by=indices, inplace=True) if args.compression is not None: out_pkl += '.{}'.format(args.compression) print('{} compression used in pickling the dataframe'.format(args.compression)) df.to_pickle(out_pkl, compression=args.compression) print('Absolute optimal calibration results dataframe pickled to {}'\ .format(out_pkl)) print('Script run time: {}'.format(datetime.datetime.now() - startTime))
def main(): parser = argparse.ArgumentParser(formatter_class=argparse.\ RawDescriptionHelpFormatter, description=textwrap.dedent(""" Check the relative redundant calibration of visibilities The relative calibration script rel_cal.py reuses the previous solution to initialize the next solver. While this greatly speeds up the code, there is the worry that the solver gets stuck in a local minimum. This script takes a random sample of frequency and time integration slices from a dataframe of results from relative calibration, and re-runs relative calibration on them with vanilla initial parameter guesses of 1+1j for the gains and 0+0j for the visibilities, and verifies if these results match with the original results by checking both their negative log-likelhoods and their gain amplitudes. We expect these to match, but we do not expect the gain and visibility solution phases to be equal, since there are still some additional degeneracies (overall phase & tilt shifts) that have not been accounted for. Returns a pickled pandas dataframe of the Scipy optimization results for the relative redundant calibration for each set of randomly chosen frequency channel and time integration slices chosen from the rel_df results dataframe """)) parser.add_argument('rel_df', help='Relative calibration results dataframe \ in pickle file format', metavar='df', type=str) parser.add_argument('-o', '--out', required=False, default=None, \ metavar='O', type=str, help='Output csv and df name') parser.add_argument('-c', '--no_checks', required=False, default=50, \ metavar='C', type=int, help='Number of checks') parser.add_argument('-t', '--tol', required=False, default=0.01, \ metavar='T', type=float, help='Tolerance for the \ negative log-likelihood of relative calibration results \ to match' ) parser.add_argument('-w', '--overwrite', required=False, action='store_true', \ help='Overwrite existing check csv and dataframe') parser.add_argument('-r', '--rel_dir', required=False, default=None, metavar='R', \ type=str, help='Directory in which rel_dfs are stored') parser.add_argument('-u', '--out_dir', required=False, default=None, metavar='U', \ type=str, help='Out directory to store dataframe') parser.add_argument('-k', '--keep_csv', required=False, action='store_true', \ help='Keep csv file') args = parser.parse_args() startTime = datetime.datetime.now() sout = args.rel_df.split('.') jd_time = str('{}.{}'.format(sout[1], sout[2])) pol = sout[3] dist = sout[4] no_checks = args.no_checks out_fn = args.out if out_fn is None: out_fn = 'check_rel_df.{}.{}.{}'.format(jd_time, pol, dist) if args.out_dir is not None: if not os.path.exists(args.out_dir): os.mkdir(args.out_dir) out_fn = os.path.join(args.out_dir, out_fn) out_csv = fn_format(out_fn, 'csv') csv_exists = os.path.exists(out_csv) if csv_exists: if args.overwrite: os.remove(out_csv) out_df = out_csv.rsplit('.', 1)[0] + '.pkl' df_exists = os.path.exists(out_df) if df_exists: if args.overwrite: os.remove(out_df) df_exists = False match_keys = ['loglkl_match', 'gamp_match'] if not df_exists: zen_fn = find_zen_file(jd_time) bad_ants = get_bad_ants(zen_fn) print('Checking the relative redundant calibration results for {}\n'.\ format(args.rel_df)) if args.rel_dir is not None: rel_dir_path = os.path.join(args.rel_dir, args.rel_df) else: rel_dir_path = args.rel_df rel_df = pd.read_pickle(rel_dir_path) no_checks = min(no_checks, len(rel_df.index)) rnd_idxs = numpy.random.choice(rel_df.index.values, no_checks, \ replace=False) rnd_chans = numpy.unique([rnd_idx[0] for rnd_idx in rnd_idxs]) fmap = dict(map(reversed, enumerate(rnd_chans))) hd, RedG, cData = group_data(zen_fn, pol, rnd_chans, None, bad_ants) cData = cData.data freq_chans = numpy.arange(hd.Nfreqs) time_ints = numpy.arange(hd.Ntimes) # to get fields for the csv header no_ants = numpy.unique(RedG[:, 1:]).size no_unq_bls = numpy.unique(RedG[:, 0]).size cRedG = relabelAnts(RedG) psize = (no_ants + no_unq_bls) * 2 indices = ['freq', 'time_int'] slct_keys = ['success', 'status', 'message', 'fun', 'nit', 'x'] header = slct_keys[:-1] + match_keys + list( numpy.arange(psize)) + indices stdout = io.StringIO() with redirect_stdout(stdout): # suppress output with open(out_csv, 'a') as f: # write / append to csv file writer = DictWriter(f, fieldnames=header) writer.writeheader() for iter_dim in rnd_idxs: res_rel = doRelCal(cRedG, cData[fmap[iter_dim[0]], iter_dim[1]], \ no_unq_bls, no_ants, coords='cartesian', \ distribution=dist, norm_gains=True) res_rel = {key: res_rel[key] for key in slct_keys} # checking results res_rel[match_keys[0]] = numpy.abs(norm_residuals(rel_df.\ loc[iter_dim]['fun'], res_rel['fun'])) < args.tol res_gamp = numpy.abs(split_rel_results(rel_df.loc[iter_dim][5:-2].\ values.astype(float), no_unq_bls)[1]) check_gamp = numpy.abs(split_rel_results(res_rel['x'], \ no_unq_bls)[1]) res_rel[match_keys[1]] = (numpy.abs(norm_residuals(res_gamp, \ check_gamp)) < args.tol).all() # expanding out the solution for i, param in enumerate(res_rel['x']): res_rel[i] = param del res_rel['x'] res_rel.update({indices[0]:iter_dim[0], \ indices[1]:iter_dim[1]}) writer.writerow(res_rel) df = pd.read_csv(out_csv) df.set_index(indices, inplace=True) df.sort_values(by=indices, inplace=True) df.to_pickle(out_df) if not args.keep_csv: os.remove(out_csv) else: print('Checked relative calibration results saved to csv file {}'.\ format(out_csv)) print('Checked relative calibration results dataframe pickled to {}\n'.\ format(out_df)) else: df = pd.read_pickle(out_df) print('Checked relative calibration results already exists in {} - '\ 'specify --overwrite as an argument to perform check again.\n'.\ format(out_df)) no_checks = len(df.index) matches = df[match_keys].values all_match = matches.all() if all_match: pmatch = 'All' else: pmatch = '{}% of'.format( round(100 * numpy.sum(matches[:, 1]) / matches.shape[0], 2)) print('{} iterations from the {} randomly selected frequency and time '\ 'slices match the original results at a tolerance of {}%.\n'.\ format(pmatch, no_checks, args.tol*100)) if not all_match: print('Mismatched iterations are {}\n'.format( df[~df['gamp_match']].index.values)) print('Script run time: {}'.format(datetime.datetime.now() - startTime))
def main(): parser = argparse.ArgumentParser(formatter_class=argparse.\ RawDescriptionHelpFormatter, description=textwrap.dedent(""" Across days relative redundant calibration of visibilities Takes HERA visibility datasets across several JDs in uvh5 file format, aligns them in LAST and then performs relative redundant calibration (up to the overall amplitude, overall phase, and phase gradient degenerate parameters) for each frequency channel and each time integration in the dataset. Returns a pickled pandas dataframe of the Scipy optimization results for the relative redundant calibration for each set of frequency channel and time integration. """)) parser.add_argument('jd_time', help='Fractional JD time of dataset to \ align other dataframes to', metavar='JD', type=str) parser.add_argument('-j', '--jds', required=True, metavar='J', \ type=str, help='JDs to calibrate') parser.add_argument('-p', '--pol', required=True, metavar='P', type=str, \ help='Polarization {"ee", "en", "nn", "ne"}') parser.add_argument('-c', '--chans', required=False, default=None, metavar='C', \ type=str, help='Frequency channels to calibrate \ {0, 1023}' ) parser.add_argument('-t', '--tints', required=False, default=None, metavar='T', \ type=str, help='Time integrations to calibrate \ {0, 59}' ) parser.add_argument('-f', '--flag_type', required=False, default='first', \ metavar='F', type=str, help='Flag type e.g. "first", \ "omni", "abs"' ) parser.add_argument('-d', '--dist', required=True, metavar='D', \ type=str, help='Fitting distribution for calibration \ {"cauchy", "gaussian"}' ) parser.add_argument('-v', '--noise', required=False, action='store_true', \ help='Use noise from autos in nlogL calculations') parser.add_argument('-cf', '--chan_flag_pct', required=False, default=None, \ metavar='CFP', type=float, help='Flag channel if more than \ X% of day/time slices for a given channel are flagged' ) parser.add_argument('-o', '--out', required=False, default=None, \ metavar='O', type=str, help='Output csv and df name') parser.add_argument('-u', '--out_dir', required=False, default=None, metavar='U', \ type=str, help='Out directory to store dataframe') parser.add_argument('-n', '--new_df', required=False, action='store_true', \ help='Write data to a new dataframe') parser.add_argument('-k', '--compression', required=False, default=None, metavar='K', \ type=str, help='Compression to use when pickling results dataframe') args = parser.parse_args() startTime = datetime.datetime.now() zen_fn = find_zen_file(args.jd_time) hd = HERAData(zen_fn) out_fn = args.out default_fn = 'xd_rel_df.{}.{}.{}'.format('{:.4f}'.format(hd.lsts[0]), \ args.pol, args.dist) if out_fn is None: out_fn = default_fn if args.out_dir is not None: if not os.path.exists(args.out_dir): os.mkdir(args.out_dir) out_fn = os.path.join(args.out_dir, out_fn) if out_fn is not None: default_fn = os.path.join(args.out_dir, default_fn) out_csv = fn_format(out_fn, 'csv') out_pkl = out_csv.rsplit('.', 1)[0] + '.pkl' csv_exists = os.path.exists(out_csv) pkl_exists = os.path.exists(out_pkl) if csv_exists or pkl_exists: if args.new_df: out_csv = new_fn(out_csv, None, startTime) out_pkl = out_csv.rsplit('.', 1)[0] + '.pkl' csv_exists = False pkl_exists = False JDs = args.jds if JDs == 'idr2_jds': JDs = numpy.asarray(idr2_jds) elif JDs == 'idr2_jdsx': JDs = numpy.asarray(idr2_jdsx) else: if '_' in JDs: JDs = numpy.asarray(JDs.split('_'), dtype=int) else: JDs = mod_str_arg(JDs) JDs = numpy.intersect1d(JDs, idr2_jds) freq_chans = mod_str_arg(args.chans) time_ints = mod_str_arg(args.tints) pchans = args.chans if pchans is None: pchans = '0~{}'.format(hd.Nfreqs - 1) ptints = args.tints if ptints is None: ptints = '0~{}'.format(hd.Ntimes - 1) print('Running relative redundant calibration across JDs {} between LASTS '\ '{:.4f} and {:.4f} for polarization {}, frequency channel(s) {} '\ 'and time integration(s) {}, with {} assumed noise distribution.\n'.\ format(' '.join(map(str, JDs)), hd.lsts[0], hd.lsts[-1], args.pol, \ pchans, ptints, args.dist)) if freq_chans is None: freq_chans = numpy.arange(hd.Nfreqs) if time_ints is None: time_ints = numpy.arange(hd.Ntimes) indices = ['freq', 'time_int'] no_tints = len(time_ints) iter_dims = list(numpy.ndindex((len(freq_chans), no_tints))) skip_cal = False # skipping freqs and tints that are already in the dataframe if csv_exists or pkl_exists: cmap_f = dict(map(reversed, enumerate(freq_chans))) cmap_t = dict(map(reversed, enumerate(time_ints))) if csv_exists: df = pd.read_csv(out_csv, usecols=indices) idx_arr = df.values elif pkl_exists: df_pkl = pd.read_pickle(out_pkl) idx_arr = df_pkl.index.values done = [(cmap_f[f], cmap_t[t]) for (f, t) in idx_arr if (f in freq_chans \ and t in time_ints)] iter_dims = [idim for idim in iter_dims if idim not in done] if not any(iter_dims): print('Solutions to all specified frequency channels and time '\ 'integrations already exist in {}\n'.format(out_pkl)) skip_cal = True if not skip_cal: stdout = io.StringIO() with redirect_stdout(stdout): # suppress output grp = XDgroup_data(args.jd_time, JDs, args.pol, chans=freq_chans, \ tints=time_ints, use_flags=args.flag_type, \ noise=args.noise) if not args.noise: _, RedG, cData = grp noisec = None else: _, RedG, cData, cNData = grp flags = cData.mask cData = cData.data # to get fields for the csv header ants = numpy.unique(RedG[:, 1:]) no_ants = ants.size no_unq_bls = numpy.unique(RedG[:, 0]).size cRedG = relabelAnts(RedG) psize = (no_ants * JDs.size + no_unq_bls) * 2 # discarding 'jac', 'hess_inv', 'nfev', 'njev' slct_keys = ['success', 'status', 'message', 'fun', 'nit', 'x'] header = slct_keys[:-1] + list(numpy.arange(psize)) + indices # remove flagged channels from iter_dims if isinstance(flags, numpy.bool_): # If all flags are the same flags = [flags] if True in flags: if args.chan_flag_pct is None: flg_chans = numpy.unique( numpy.where(flags.all(axis=(0, 2, 3)))[0]) print('Flagged channels across all days are: {}\n'.\ format(freq_chans[flg_chans])) else: flg_pct = args.chan_flag_pct / 100 flg_chans = numpy.unique(numpy.where(flags.all(axis=3).mean(axis=(0, 2)) \ > flg_pct)[0]) print('Flagged channels across all days and those that are '\ 'more than {}% flagged for their given day/time slice are: {}\n'.\ format(args.chan_flag_pct, freq_chans[flg_chans] )) iter_dims = [ idim for idim in iter_dims if idim[0] not in flg_chans ] if not iter_dims: # check if slices to solve are empty print('All specified channels are flagged. Exiting.') sys.exit() def cal(credg, distribution, no_unq_bls, no_ants, obsvis, noise, initp): """Relative redundant calibration across days with doRelCalD: default implementation with unconstrained minimizer using cartesian coordinates """ res_rel, initp_new = doRelCalD(credg, obsvis, no_unq_bls, no_ants, \ distribution=distribution, noise=noise, initp=initp, \ return_initp=True, xd=True) res_rel = {key: res_rel[key] for key in slct_keys} # use solution for next solve in iteration if res_rel['success']: initp = initp_new return res_rel, initp RelCal = functools.partial(cal, cRedG, args.dist, no_unq_bls, no_ants) with redirect_stdout(stdout): # suppress output with open(out_csv, 'a') as f: # write / append to csv file writer = DictWriter(f, fieldnames=header) if not csv_exists: writer.writeheader() initp = None for i, iter_dim in enumerate(iter_dims): if args.noise: noisec = cNData[:, iter_dim[0], iter_dim[1], :] res_rel, initp = RelCal(cData[:, iter_dim[0], iter_dim[1], :], \ noisec, initp) # expanding out the solution for j, param in enumerate(res_rel['x']): res_rel[j] = param # reset initp after each frequency slice if not (i + 1) % no_tints: initp = None del res_rel['x'] res_rel.update({indices[0]:freq_chans[iter_dim[0]], \ indices[1]:time_ints[iter_dim[1]]}) writer.writerow(res_rel) print('Relative calibration results saved to csv file {}'.format( out_csv)) df = pd.read_csv(out_csv) if csv_exists: freqs = df['freq'].unique() tints = df['time_int'].unique() if cData.shape[0] != freqs.size or cData.shape[1] != tints.size: _, _, cData = XDgroup_data(args.jd_time, JDs, args.pol, chans=freqs, tints=tints, use_flags=args.flag_type, \ noise=None) cData = cData.data df.set_index(indices, inplace=True) # we now append the residuals as additional columns df = append_residuals_rel(df, cData, cRedG, 'cartesian', out_fn=None) if pkl_exists and not csv_exists: df = pd.concat([df, df_pkl]) df.sort_values(by=indices, inplace=True) if args.compression is not None: out_pkl += '.{}'.format(args.compression) print('{} compression used in pickling the dataframe'.format( args.compression)) df.to_pickle(out_pkl, compression=args.compression) print('Relative calibration results dataframe pickled to {}'.format( out_pkl)) # creating metadata file out_md = default_fn.rsplit('.', 1)[0] + '.md.pkl' if not os.path.exists(out_md): md = {'no_ants':no_ants, 'no_unq_bls':no_unq_bls, 'redg':RedG, \ 'antpos':hd.antpos, 'last':hd.lsts, 'Nfreqs':hd.Nfreqs, \ 'Ntimes':hd.Ntimes, 'JDs':JDs} with open(out_md, 'wb') as f: pickle.dump(md, f, protocol=pickle.HIGHEST_PROTOCOL) print( 'Relative calibration metadata pickled to {}\n'.format(out_md)) print('Script run time: {}'.format(datetime.datetime.now() - startTime))