def __init__(self, dataFileList, reds, fileformat='miriad'): """Initilize an AntennaMetrics object. Arguments: dataFileList: List of data filenames of the four different visibility polarizations for the same observation reds: List of lists of tuples of antenna numbers that make up redundant baseline groups. format: File type of data Supports: 'miriad','uvfits', 'fhd', 'ms ' (see pyuvdata docs) Default: 'miriad'. """ from hera_cal.io import HERAData if fileformat == 'miriad': self.hd = HERAData(dataFileList, filetype='miriad') elif fileformat == 'uvfits': self.hd = HERAData(dataFileList, filetype='uvfits') elif fileformat == 'fhd': raise NotImplemented(str(fileformat) + 'not supported') else: raise ValueError('Unrecognized file format ' + str(fileformat)) self.data, self.flags, self.nsamples = self.hd.read() self.ants = self.hd.get_ants() self.pols = [pol.lower() for pol in self.hd.get_pols()] self.antpols = [antpol.lower() for antpol in self.hd.get_feedpols()] self.bls = self.hd.get_antpairs() self.dataFileList = dataFileList self.reds = reds self.version_str = hera_qm_version_str self.history = '' if len(self.antpols) is not 2 or len(self.pols) is not 4: raise ValueError('Missing polarization information. pols =' + str(self.pols) + ' and antpols = ' + str(self.antpols))
def generate_residual_IDR2_2(uvh5_file, omni_vis, omni_calfits, abs_calfits, outfile, clobber=False): # reading uvh5 data file hd = HERAData(uvh5_file) data, flags, nsamples = hd.read(polarizations=['ee', 'nn']) # reading omnical model visibilities hd_oc = HERAData(omni_vis) omnivis, omnivis_flags, _ = hd_oc.read() uvo = pyuvdata.UVData() uvo.read_uvh5(omni_vis) # reading calfits file hc = HERACal(omni_calfits) oc_gains, oc_flags, oc_quals, oc_total_quals = hc.read() hc = HERACal(abs_calfits) ac_gains, ac_flags, ac_quals, ac_total_quals = hc.read() # calibrating the data abscal_data, abscal_flags = copy.deepcopy(data), copy.deepcopy(flags) calibrate_in_place(abscal_data, ac_gains, data_flags=abscal_flags, cal_flags=ac_flags) res_data, res_flags = copy.deepcopy(hd.data_array), copy.deepcopy( hd.flag_array) resdata, resflags = copy.deepcopy(abscal_data), copy.deepcopy(abscal_flags) for i, p in enumerate(['ee', 'nn']): # reading omnical model visibilities hd_oc = HERAData(omni_vis) omnivis, omnivis_flags, _ = hd_oc.read(polarizations=[p]) mod_bls = list(omnivis.keys()) red_bls = get_reds(hd.antpos, pols=p) red = gr.RBL(red_bls) for mbl in mod_bls: bl_grp = red[tuple(mbl[0:2]) + ('J{}'.format(p), )] for blp in bl_grp: bl = (blp[0], blp[1], p) inds = hd.antpair2ind(bl) omnivis_scaled = omnivis[mbl] * oc_gains[(blp[0], 'J{}'.format( p))] * np.conj(oc_gains[(blp[1], 'J{}'.format(p))]) omnivis_scaled /= ( ac_gains[(blp[0], 'J{}'.format(p))] * np.conj(ac_gains[(blp[1], 'J{}'.format(p))])) resdata[bl] = abscal_data[bl] - omnivis_scaled resflags[bl] = abscal_flags[bl] res_data[inds, 0, :, i] = resdata[bl] res_flags[inds, 0, :, i] = resflags[bl] # writing to file hd.data_array = res_data hd.flag_array = res_flags hd.write_uvh5(outfile, clobber=clobber)
def reds_from_file(filename, vis_format='miriad'): """Get the redundant baseline pairs from a file. This is a wrapper around hera_cal.redcal.get_pos_reds that doesn't read the data file if it's possible to only read metadata. Parameters ---------- filename : str The file to get reds from. vis_format : {'miriad', 'uvh5', 'uvfits', 'fhd', 'ms'}, optional Format of the data file. Default is 'miriad'. Returns ------- reds : list of lists of tuples Each tuple represents antenna pairs. These are compiled in a list within a redundant group, and the outer list is all the redundant groups. See hera_cal.redcal.get_pos_reds. """ from hera_cal.io import HERAData from hera_cal.redcal import get_pos_reds hd = HERAData(filename, filetype=vis_format) if hd.antpos is None: reds = get_pos_reds(hd.read()[0].antpos) else: reds = get_pos_reds(hd.antpos) del hd return reds
def __init__(self, dataFileList, reds, fileformat='miriad'): """Initilize an AntennaMetrics object. Parameters ---------- dataFileList : list of str List of data filenames of the four different visibility polarizations for the same observation. reds : list of tuples of ints List of lists of tuples of antenna numbers that make up redundant baseline groups. format : str, optional File type of data. Must be one of: 'miriad', 'uvh5', 'uvfits', 'fhd', 'ms' (see pyuvdata docs). Default is 'miriad'. Attributes ---------- hd : HERAData HERAData object generated from dataFileList. data : array Data contained in HERAData object. flags : array Flags contained in HERAData object. nsamples : array Nsamples contained in HERAData object. ants : list of ints List of antennas in HERAData object. pols : list of str List of polarizations in HERAData object. bls : list of ints List of baselines in HERAData object. dataFileList : list of str List of data filenames of the four different visibility polarizations for the same observation. reds : list of tuples of ints List of lists of tuples of antenna numbers that make up redundant baseline groups. version_str : str The version of the hera_qm module used to generate these metrics. history : str History to append to the metrics files when writing out files. """ from hera_cal.io import HERAData self.hd = HERAData(dataFileList, filetype=fileformat) self.data, self.flags, self.nsamples = self.hd.read() self.ants = self.hd.get_ants() self.pols = [pol.lower() for pol in self.hd.get_pols()] self.antpols = [antpol.lower() for antpol in self.hd.get_feedpols()] self.bls = self.hd.get_antpairs() self.dataFileList = dataFileList self.reds = reds self.version_str = hera_qm_version_str self.history = '' if len(self.antpols) != 2 or len(self.pols) != 4: raise ValueError('Missing polarization information. pols =' + str(self.pols) + ' and antpols = ' + str(self.antpols))
def test_init_HERAData(): uv = UVData() uv.read_miriad(test_d_file) uvf1 = UVFlag(uv) hd = HERAData(test_d_file, filetype='miriad') hd.read() uvf2 = UVFlag(hd) nt.assert_equal(uvf1, uvf2)
def test_reds_from_file_no_read_file(): from hera_cal.io import HERAData from hera_cal.redcal import get_pos_reds # uvh5 file will not need to be read in testfile = os.path.join(DATA_PATH, 'zen.2457698.40355.xx.HH.uvh5') reds = ant_metrics.reds_from_file(testfile, vis_format='uvh5') assert len(reds) > 1 hd = HERAData(testfile, filetype='uvh5') reds_check = get_pos_reds(hd.antpos) assert reds == reds_check
def calfits_to_flags(JD_time, cal_type, pol='ee', add_bad_ants=None): """Returns flags array from calfits file :param JD_time: Fractional Julian date :type JD_time: float, str :param cal_type: Calibration process that produced the calfits file {"first", "omni", "abs", "flagged_abs", "smooth_abs"} :type cal_type: str :param pol: Polarization of data :type pol: str :param add_bad_ants: Additional bad antennas :type add_bad_ants: None, int, list, ndarray :return: Flags array :rtype: ndarray """ zen_fn = find_zen_file(JD_time) flags_fn = find_flag_file(JD_time, cal_type) bad_ants = get_bad_ants(zen_fn) if add_bad_ants is not None: bad_ants = numpy.sort(numpy.append(bad_ants, numpy.array(add_bad_ants))) hc = HERACal(flags_fn) _, cal_flags, _, _ = hc.read() hd = HERAData(zen_fn) reds = get_reds(hd.antpos, pols=[pol]) reds = fltBad(reds, bad_ants) redg = groupBls(reds) antpairs = redg[:, 1:] cflag = numpy.empty((hd.Nfreqs, hd.Ntimes, redg.shape[0]), dtype=bool) for g in range(redg.shape[0]): cflag[:, :, g] = cal_flags[(int(antpairs[g, 0]), 'J{}'.format(pol)) or \ (int(antpairs[g, 1]), 'J{}'.format(pol))].transpose() return cflag
def main(): parser = argparse.ArgumentParser(formatter_class=argparse.\ RawDescriptionHelpFormatter, description=textwrap.dedent(""" Relative redundant calibration of visibilities Takes a given HERA visibility dataset in uvh5 file format and performs relative redundant calibration (up to the overall amplitude, overall phase, and phase gradient degenerate parameters) for each frequency channel and each time integration in the dataset. Returns a pickled pandas dataframe of the Scipy optimization results for the relative redundant calibration for each set of frequency channel and time integration. """)) parser.add_argument('jd_time', help='Fractional JD time of dataset to \ calibrate', metavar='JD', type=str) parser.add_argument('-o', '--out', required=False, default=None, \ metavar='O', type=str, help='Output csv and df name') parser.add_argument('-p', '--pol', required=True, metavar='P', type=str, \ help='Polarization {"ee", "en", "nn", "ne"}') parser.add_argument('-c', '--chans', required=False, default=None, metavar='C', \ type=str, help='Frequency channels to calibrate \ {0, 1023}' ) parser.add_argument('-t', '--tints', required=False, default=None, metavar='T', \ type=str, help='Time integrations to calibrate \ {0, 59}' ) parser.add_argument('-f', '--flag_type', required=False, default='first', \ metavar='F', type=str, help='Flag type e.g. "first", \ "omni", "abs"' ) parser.add_argument('-d', '--dist', required=True, metavar='D', \ type=str, help='Fitting distribution for calibration \ {"cauchy", "gaussian"}' ) parser.add_argument('-m', '--method', required=False, default='cartesian', \ metavar='M', type=str, help='Method to use - {"cartesian", \ "polar", "RP"}, where RP stands for reduced parameters' ) parser.add_argument('-l', '--logamp', required=False, action='store_true', \ help='Use logamp method to force positive gain amplitudes') parser.add_argument('-g', '--tilt_reg', required=False, action='store_true', \ help='Add regularization term to constrain tilt shifts to 0') parser.add_argument('-a', '--gphase_reg', required=False, action='store_true', \ help='Add regularization term to constrain the gain phase mean') parser.add_argument('-i', '--initp_jd', required=False, default=None, metavar='I', \ type=int, help='JD of to find datasets to reuse initial parameters') parser.add_argument('-v', '--noise', required=False, action='store_true', \ help='Use noise from autos in nlogL calculations') parser.add_argument('-u', '--out_dir', required=False, default=None, metavar='U', \ type=str, help='Out directory to store dataframe') parser.add_argument('-n', '--new_df', required=False, action='store_true', \ help='Write data to a new dataframe') args = parser.parse_args() startTime = datetime.datetime.now() out_fn = args.out default_fn = 'rel_df.{}.{}.{}'.format(args.jd_time, args.pol, args.dist) if out_fn is None: out_fn = default_fn if args.out_dir is not None: if not os.path.exists(args.out_dir): os.mkdir(args.out_dir) out_fn = os.path.join(args.out_dir, out_fn) if out_fn is not None: default_fn = os.path.join(args.out_dir, default_fn) out_csv = fn_format(out_fn, 'csv') out_pkl = out_csv.rsplit('.', 1)[0] + '.pkl' csv_exists = os.path.exists(out_csv) pkl_exists = os.path.exists(out_pkl) if csv_exists or pkl_exists: if args.new_df: out_csv = new_fn(out_csv, None, startTime) out_pkl = out_csv.rsplit('.', 1)[0] + '.pkl' csv_exists = False pkl_exists = False zen_fn = find_zen_file(args.jd_time) bad_ants = get_bad_ants(zen_fn) flag_type = args.flag_type if flag_type is not None: flag_fn = find_flag_file(args.jd_time, flag_type) else: flag_fn = None freq_chans = mod_str_arg(args.chans) time_ints = mod_str_arg(args.tints) hd = HERAData(zen_fn) pchans = args.chans if pchans is None: pchans = '0~{}'.format(hd.Nfreqs - 1) ptints = args.tints if ptints is None: ptints = '0~{}'.format(hd.Ntimes - 1) print('Running relative redundant calibration on visibility dataset {} for '\ 'polarization {}, frequency channel(s) {} and time integration(s) {} '\ 'with {} assumed noise distribution\n'.\ format(os.path.basename(zen_fn), args.pol, pchans, ptints, args.dist)) if freq_chans is None: freq_chans = numpy.arange(hd.Nfreqs) if time_ints is None: time_ints = numpy.arange(hd.Ntimes) indices = ['freq', 'time_int'] no_tints = len(time_ints) iter_dims = list(numpy.ndindex((len(freq_chans), no_tints))) skip_cal = False # skipping freqs and tints that are already in the dataframe if csv_exists or pkl_exists: cmap_f = dict(map(reversed, enumerate(freq_chans))) cmap_t = dict(map(reversed, enumerate(time_ints))) if csv_exists: df = pd.read_csv(out_csv, usecols=indices) idx_arr = df.values elif pkl_exists: df_pkl = pd.read_pickle(out_pkl) idx_arr = df_pkl.index.values done = [(cmap_f[f], cmap_t[t]) for (f, t) in idx_arr if (f in freq_chans \ and t in time_ints)] iter_dims = [idim for idim in iter_dims if idim not in done] if not any(iter_dims): print('Solutions to all specified frequency channels and time '\ 'integrations already exist in {}\n'.format(out_pkl)) skip_cal = True if not skip_cal: grp = group_data(zen_fn, args.pol, freq_chans, time_ints, \ bad_ants, flag_path=flag_fn, noise=args.noise) if not args.noise: _, RedG, cData = grp noisec = None else: _, RedG, cData, cNData = grp flags = cData.mask cData = cData.data # to get fields for the csv header ants = numpy.unique(RedG[:, 1:]) no_ants = ants.size no_unq_bls = numpy.unique(RedG[:, 0]).size cRedG = relabelAnts(RedG) psize = (no_ants + no_unq_bls) * 2 if args.tilt_reg: ant_pos_arr = flt_ant_pos(hd.antpos, ants) else: ant_pos_arr = None # discarding 'jac', 'hess_inv', 'nfev', 'njev' slct_keys = ['success', 'status', 'message', 'fun', 'nit', 'x'] header = slct_keys[:-1] + list(numpy.arange(psize)) + indices # remove flagged channels from iter_dims if True in flags: flg_chans = numpy.where(flags.all(axis=(1, 2)))[0] # indices print('Flagged channels for visibility dataset {} are: {}\n'.\ format(os.path.basename(zen_fn), freq_chans[flg_chans])) iter_dims = [ idim for idim in iter_dims if idim[0] not in flg_chans ] if not iter_dims: # check if slices to solve are empty print('All specified channels are flagged. Exiting.') sys.exit() if args.initp_jd is not None: jd_time2 = match_lst(args.jd_time, args.initp_jd) if len(str(jd_time2)) < 13: jd_time2 = str( jd_time2 ) + '0' # add a trailing 0 that is omitted in float rel_df_path1 = find_rel_df(jd_time2, args.pol, args.dist) if isinstance(jd_time2, str): jd_time2 = float(jd_time2) last_df = pd.read_pickle('jd_lst_map_idr2.pkl') last1 = last_df[last_df['JD_time'] == float( args.jd_time)]['LASTs'].values[0] last2 = last_df[last_df['JD_time'] == jd_time2]['LASTs'].values[0] _, offset = find_nearest(last2, last1[0]) rel_df1 = pd.read_pickle(rel_df_path1) rel_df1 = rel_df1[ rel_df1.index.get_level_values('time_int') >= offset] next_row = numpy.where(last_df['JD_time'] == jd_time2)[0][0] + 1 rel_df_path2 = find_rel_df(last_df.iloc[next_row]['JD_time'], args.pol, \ args.dist) rel_df2 = pd.read_pickle(rel_df_path2) rel_df2 = rel_df2[ rel_df2.index.get_level_values('time_int') < offset] rel_df_c = pd.concat([rel_df1, rel_df2]) # filter by specified channels and time integrations time_ints_offset = (time_ints + offset) % hd.Ntimes freq_flt = numpy.in1d(rel_df_c.index.get_level_values('freq'), freq_chans) tint_flt = numpy.in1d(rel_df_c.index.get_level_values('time_int'), time_ints_offset) rel_df_c = rel_df_c[freq_flt & tint_flt] time_ints2 = numpy.tile( rel_df_c.index.get_level_values('time_int').unique().values, freq_chans.size) iter_dims = [ idim + (tint, ) for idim, tint in zip(iter_dims, time_ints2) ] phase_reg_initp = True else: phase_reg_initp = False def cal(credg, distribution, coords, no_unq_bls, no_ants, logamp, \ tilt_reg, gphase_reg, ant_pos_arr, obsvis, noise, initp): """Relative redundant calibration with doRelCal: unconstrained minimizer using cartesian coordinates - this is the fastest solver :param credg: Grouped baselines, condensed so that antennas are consecutively labelled. See relabelAnts :type credg: ndarray :param distribution: Distribution to fit likelihood {'gaussian', 'cauchy'} :type distribution: str :param coords: Coordinate system in which gain and visibility parameters have been set up :type coords: str {"cartesian", "polar"} :param no_unq_bls: Number of unique baselines (equivalently the number of redundant visibilities) :type no_unq_bls: int :param no_ants: Number of antennas for given observation :type no_ants: int :param logamp: The logarithm of the amplitude initial parameters is taken, such that only positive solutions can be returned. Only if coords=="polar". :type logamp: bool :param tilt_reg: Add regularization term to constrain tilt shifts to 0 :type tilt_reg: bool :param gphase_reg: Add regularization term to constrain the gain phase mean :type gphase_reg: bool :param ant_pos_arr: Array of filtered antenna position coordinates for the antennas in ants. See flt_ant_pos. :type ant_pos_arr: ndarray :param obsvis: Observed sky visibilities for a given frequency and given time, reformatted to have format consistent with redg :type obsvis: ndarray :param noise: Noise array to feed into log-likelihood calculations :type noise: ndarray :param initp: Initial parameter guesses for true visibilities and gains :type initp: ndarray, None :return: Optimization result for the solved antenna gains and true sky visibilities :rtype: Scipy optimization result object """ res_rel, initp_new = doRelCal(credg, obsvis, no_unq_bls, no_ants, \ coords=coords, distribution=distribution, noise=noise, \ norm_gains=True, logamp=logamp, tilt_reg=tilt_reg, \ gphase_reg=gphase_reg, ant_pos_arr=ant_pos_arr, initp=initp, \ return_initp=True, phase_reg_initp=phase_reg_initp) res_rel = {key: res_rel[key] for key in slct_keys} # use solution for next solve in iteration if res_rel['success']: initp = initp_new return res_rel, initp def cal_RP(credg, distribution, no_unq_bls, no_ants, logamp, \ tilt_reg, gphase_reg, ant_pos_arr, obsvis, noise, initp): """Relative redundant calibration with doRelCalRP: constrained minimizer (by reducing the number of parameters) using polar coordinates :param credg: Grouped baselines, condensed so that antennas are consecutively labelled. See relabelAnts :type credg: ndarray :param distribution: Distribution to fit likelihood {'gaussian', 'cauchy'} :type distribution: str :param no_unq_bls: Number of unique baselines (equivalently the number of redundant visibilities) :type no_unq_bls: int :param no_ants: Number of antennas for given observation :type no_ants: int :param logamp: The logarithm of the amplitude initial parameters is taken, such that only positive solutions can be returned. Only if coords=="polar". :type logamp: bool :param tilt_reg: Add regularization term to constrain tilt shifts to 0 :type tilt_reg: bool :param gphase_reg: Add regularization term to constrain the gain phase mean :type gphase_reg: bool :param ant_pos_arr: Array of filtered antenna position coordinates for the antennas in ants. See flt_ant_pos. :type ant_pos_arr: ndarray :param obsvis: Observed sky visibilities for a given frequency and given time, reformatted to have format consistent with redg :type obsvis: ndarray :param noise: Noise array to feed into log-likelihood calculations :type noise: ndarray :param initp: Initial parameter guesses for true visibilities and gains :type initp: ndarray, None :return: Optimization result for the solved antenna gains and true sky visibilities :rtype: Scipy optimization result object """ res_rel, initp_ = doRelCalRP(credg, obsvis, no_unq_bls, no_ants, \ distribution=distribution, noise=noise, constr_phase=True, \ amp_constr='prod', bounded=True, logamp=logamp, tilt_reg=tilt_reg, \ gphase_reg=gphase_reg, ant_pos_arr=gphase_reg, initp=initp) res_rel = {key: res_rel[key] for key in slct_keys} # use solution for next solve in iteration if res_rel['success']: initp = initp_ return res_rel, initp if args.method.upper() == 'RP': RelCal = functools.partial(cal_RP, cRedG, args.dist, no_unq_bls, no_ants, \ args.logamp, args.tilt_reg, args.gphase_reg, \ ant_pos_arr) coords = 'polar' else: RelCal = functools.partial(cal, cRedG, args.dist, args.method, no_unq_bls, \ no_ants, args.logamp, args.tilt_reg, \ args.gphase_reg, ant_pos_arr) coords = args.method stdout = io.StringIO() with redirect_stdout(stdout): # suppress output with open(out_csv, 'a') as f: # write / append to csv file writer = DictWriter(f, fieldnames=header) if not csv_exists: writer.writeheader() initp = None for i, iter_dim in enumerate(iter_dims): if args.initp_jd is not None: initp = rel_df_c.loc[(freq_chans[iter_dim[0]], iter_dim[2])]\ [len(slct_keys[:-1]):-2].values.astype(float) if args.noise: noisec = cNData[iter_dim[:2]] res_rel, initp = RelCal(cData[iter_dim[:2]], noisec, initp) # expanding out the solution for j, param in enumerate(res_rel['x']): res_rel[j] = param # reset initp after each frequency slice if not (i + 1) % no_tints and args.initp_jd is None: initp = None del res_rel['x'] res_rel.update({indices[0]:freq_chans[iter_dim[0]], \ indices[1]:time_ints[iter_dim[1]]}) writer.writerow(res_rel) print('Relative calibration results saved to csv file {}'.format( out_csv)) df = pd.read_csv(out_csv) if csv_exists: freqs = df['freq'].unique() tints = df['time_int'].unique() if cData.shape[0] != freqs.size or cData.shape[1] != tints.size: _, _, cData = group_data(zen_fn, args.pol, freqs, tints, \ bad_ants, flag_path=flag_fn) cData = cData.data df.set_index(indices, inplace=True) # we now append the residuals as additional columns df = append_residuals_rel(df, cData, cRedG, coords, out_fn=None) if pkl_exists and not csv_exists: df = pd.concat([df, df_pkl]) df.sort_values(by=indices, inplace=True) df.to_pickle(out_pkl) print('Relative calibration results dataframe pickled to {}'.format( out_pkl)) # creating metadata file out_md = default_fn.rsplit('.', 1)[0] + '.md.pkl' if not os.path.exists(out_md): md = {'no_ants':no_ants, 'no_unq_bls':no_unq_bls, 'redg':RedG, \ 'antpos':hd.antpos, 'last':hd.lsts, 'Nfreqs':hd.Nfreqs, \ 'Ntimes':hd.Ntimes} with open(out_md, 'wb') as f: pickle.dump(md, f, protocol=pickle.HIGHEST_PROTOCOL) print( 'Relative calibration metadata pickled to {}\n'.format(out_md)) print('Script run time: {}'.format(datetime.datetime.now() - startTime))
def XDgroup_data(JD_time, JDs, pol, chans=None, tints=None, bad_ants=True, \ use_flags='first', noise=False, use_cal=None, verbose=False): """Returns redundant baseline grouping and reformatted dataset, with external flags applied, if specified :param JD_time: Julian time of 1st dataset, which sets times for others :type JD_time: str :param JDs: Julian days of data :type JDs: list, ndarray :param pol: Polarization of data :type pol: str :param chans: Frequency channel(s) {0, 1023} (None to choose all) :type chans: array-like, int, or None :param tints: Time integrations {0, 59} (None to choose all) :type tints: array-like, int, or None :param bad_ants: Flag known bad antennas, optional :type bad_ants: bool :param use_flags: Use flags to mask data :type use_flags: str :param noise: Also calculate noise from autocorrelations :type noise: bool :param use_cal: calfits file extension to use to calibrate data :type use_cal: str, None :param verbose: Print data gathering steps for each dataset :type verbose: bool :return hd: HERAData class :rtype hd: HERAData class :return redg: Grouped baselines, as returned by groupBls :rtype redg: ndarray :return cdata: Grouped visibilities with flags in numpy MaskedArray format, with format consistent with redg and dimensions (freq chans, time integrations, baselines) :rtype cdata: MaskedArray """ if isinstance(chans, int): chans = np.asarray([chans]) if isinstance(tints, int): tints = np.asarray([tints]) zen_fn = find_zen_file(JD_time) flags_fn = find_flag_file(JD_time, use_flags) hd = HERAData(zen_fn) if tints is None: tints = np.arange(hd.Ntimes) if bad_ants: bad_ants = union_bad_ants(JDs) else: bad_ants = None if use_cal is None: cal_path = None else: cal_path = find_flag_file(JD_time, use_cal) if not verbose: grp_data = suppressOutput(group_data) else: grp_data = group_data grp = grp_data(zen_fn, pol, chans=chans, tints=tints, bad_ants=bad_ants, flag_path=flags_fn, noise=noise, cal_path=cal_path) _, redg, cMData = grp[:3] cMData = cMData[np.newaxis, :] if noise: cNoise = grp[3] cNoise = cNoise[np.newaxis, :] JD_day = int(float(JD_time)) if JD_day in JDs: JDs = list(JDs) JDs.remove(JD_day) for jd_i in JDs: JD_time_ia = match_lst(JD_time, jd_i) # aligning datasets in LAST last_df = pd.read_pickle( os.path.join(os.path.dirname(__file__), 'jd_lst_map_idr2.pkl')) last1 = last_df[last_df['JD_time'] == float( JD_time)]['LASTs'].values[0] last2 = last_df[last_df['JD_time'] == float( JD_time_ia)]['LASTs'].values[0] _, offset = find_nearest(last2, last1[0]) tints_i = (tints + offset) % 60 scnd_dataset = all(tints + offset > hd.Ntimes - 1) single_dataset = all(tints + offset < hd.Ntimes - 1) or scnd_dataset if not single_dataset: tints_ia, tints_ib = np.split(tints_i, np.where(tints_i == 0)[0]) else: tints_ia = tints_i if scnd_dataset: next_row = numpy.where( last_df['JD_time'] == float(JD_time_ia))[0][0] + 1 JD_time_ib = last_df.iloc[next_row]['JD_time'] JD_time_ia = JD_time_ib JD_time_ia = check_jdt(JD_time_ia) zen_fn_ia = find_zen_file(JD_time_ia) flags_fn_ia = find_flag_file(JD_time_ia, use_flags) if use_cal is not None: cal_path_ia = find_flag_file(JD_time_ia, use_cal) else: cal_path_ia = None grp_a = grp_data(zen_fn_ia, pol, chans=chans, tints=tints_ia, \ bad_ants=bad_ants, flag_path=flags_fn_ia, noise=noise, \ cal_path=cal_path_ia) cMData_ia = grp_a[2] if not single_dataset: next_row = numpy.where( last_df['JD_time'] == float(JD_time_ia))[0][0] + 1 JD_time_ib = last_df.iloc[next_row]['JD_time'] JD_time_ib = check_jdt(JD_time_ib) zen_fn_ib = find_zen_file(JD_time_ib) flags_fn_ib = find_flag_file(JD_time_ib, use_flags) if use_cal is not None: cal_path_ib = find_flag_file(JD_time_ib, use_cal) else: cal_path_ib = None grp_b = grp_data(zen_fn_ib, pol, chans=chans, tints=tints_ib, \ bad_ants=bad_ants, flag_path=flags_fn_ib, \ noise=noise, cal_path=cal_path_ib) cMData_ib = grp_b[2] cMData_i = numpy.ma.concatenate((cMData_ia, cMData_ib), axis=1) else: cMData_i = cMData_ia cMData_i = cMData_i[np.newaxis, :] cMData = numpy.ma.concatenate((cMData, cMData_i), axis=0) if noise: cNoise_ia = grp_a[3] if not single_dataset: cNoise_ib = grp_b[3] cNoise_i = np.concatenate((cNoise_ia, cNoise_ib), axis=1) else: cNoise_i = cNoise_ia cNoise_i = cNoise_i[np.newaxis, :] cNoise = np.concatenate((cNoise, cNoise_i), axis=0) if noise: return hd, redg, cMData, cNoise else: return hd, redg, cMData
data_file = data_directory model_file = model_directory flag_files = [f"/lustre/aoc/projects/hera/aewallwi/H1C_flags/{jd}.flags.h5" for jd in [2458098,2458099,2458101,2458102,2458103,2458104,2458105,2458106, 2458107,2458108,2458109,2458110,2458111,2458112,2458113,2458114, 2458115,2458116]] bad_ants = [np.loadtxt(f"/users/kshahin/kshahin/HERA_Calibration/hera_pipelines/pipelines/h1c/idr2/v2/bad_ants/{ba}.txt") for ba in [2458098,2458099,2458101,2458102,2458103,2458104,2458105,2458106,2458107,2458108, 2458109,2458110,2458111,2458112,2458113,2458114,2458115,2458116]] flag_file = flag_files[day] bad_ant = bad_ants[day] if not os.path.exists(f"/users/kshahin/kshahin/HERA_Calibration/DayfxP_{day}"): os.mkdir(f"/users/kshahin/kshahin/HERA_Calibration/DayfxP_{day}") flags = UVFlag(flag_file) flags.select(frequencies = flags.freq_array[(flags.freq_array>=115*1e+6) & (flags.freq_array<175*1e+6)]) flags.select(times=flags.time_array[2600:2660]) hd_data = HERAData(data_file) freqs = hd_data.freqs[(hd_data.freqs>=115*1e+6) & (hd_data.freqs<175*1e+6)] data, flag, nsample = hd_data.read(polarizations=["ee"], frequencies=freqs) for bl in data: if (bl[0] == bad_ant).any() or (bl[1] == bad_ant).any(): flag[bl] = np.ones_like(flag[bl]) flag[bl] = flags.flag_array.squeeze() hd_data.update(flags=flag) hd_data.write_uvh5(f"/users/kshahin/kshahin/HERA_Calibration/DayfxP_{day}/data_{day}_{chunk}.uvh5", clobber=True) del data, flag, nsample, hd_data redcal.redcal_run(input_data=f"/users/kshahin/kshahin/HERA_Calibration/DayfxP_{day}/data_{day}_{chunk}.uvh5", clobber=True, solar_horizon=90, verbose=True) abscal.post_redcal_abscal_run(data_file=f"/users/kshahin/kshahin/HERA_Calibration/DayfxP_{day}/data_{day}_{chunk}.uvh5", redcal_file=f"/users/kshahin/kshahin/HERA_Calibration/DayfxP_{day}/data_{day}_{chunk}.omni.calfits", model_files=[data_file], clobber=True, data_solar_horizon=90, model_solar_horizon=90) cs=smooth_cal.CalibrationSmoother(calfits_list=sorted(glob.glob(f"/users/kshahin/kshahin/HERA_Calibration/DayfxP_{day}/data_{day}_*.abs.calfits"))) cs.time_freq_2D_filter(time_scale=21600) cs.write_smoothed_cal(clobber=True, output_replace=(".abs.",".smooth_abs."))
def ant_metrics_run(files, pols=['xx', 'yy', 'xy', 'xy'], crossCut=5.0, deadCut=5.0, alwaysDeadCut=10.0, metrics_path='', extension='.ant_metrics.hdf5', vis_format='miriad', verbose=True, history='', run_mean_vij=True, run_red_corr=True, run_cross_pols=True): """ Run a series of ant_metrics tests on a given set of input files. Args: files: List of files to run ant metrics on. Can be any of the 4 polarizations pols: List of polarizations to perform metrics over. Allowed polarizations: 'xx', 'yy', 'xy', 'yx' Default: ['xx', 'yy', 'xy', 'yx'] crossCut: Modified Z-Score limit to cut cross-polarized antennas. Default: 5.0 deadCut: Modifized Z-Score limit to cut dead antennas. Default: 5.0 alwaysDeadCut: Modified Z-Score limit for antennas that are definitely dead. Antennas with above this limit are thrown away before iterative flagging. Default: 10.0 metrics_path: String path to directory to story output metric. Default: same directy as input data files. extension: File extension to add to output files. Default: ant_metrics.hdf5 vis_format: File format of input visibility data. Supports: 'miriad','uvfits', 'fhd', 'ms' (see pyuvdata docs) Default: 'miriad' verbose: If True print out statements during iterative flagging history: The history the add to metrics. Default run_mean_vij: Boolean flag which determines if mean_Vij_metrics is executed. Default is True run_red_corr: Boolean flag which determines if red_corr_metrics is executed. Default is True run_cross_pols: Boolean flag which determines if mean_Vij_cross_pol_metrics and red_corr_cross_pol_metrics are executed. Default is True Return: None The funciton will take in a list of files and options. It will run the series of ant metrics tests, and produce an HDF5 file containing the relevant information. The file list need only contain one polarization type for a given JD, the function will look for the other polarizations in the same folder. If not all four polarizations are found, a warning is generated, since the code assumes all four polarizations are present. """ from hera_cal.omni import aa_to_info from hera_cal.utils import get_aa_from_uv from hera_cal.io import HERAData # check the user asked to run anything if not any([run_mean_vij, run_red_corr, run_cross_pols]): raise AssertionError(("No Ant Metrics have been selected to run." "Please set the correct keywords to run " "the desired metrics.")) # check that we were given some files to process if len(files) == 0: raise AssertionError('Please provide a list of visibility files') # generate a list of all files to be read in fullpol_file_list = utils.generate_fullpol_file_list(files, pols) if len(fullpol_file_list) == 0: raise AssertionError('Could not find all 4 polarizations ' 'for any files provided') # generate aa object from file # N.B.: assumes redunancy information is the same for all files passed in first_file = fullpol_file_list[0][0] hd = HERAData(first_file, filetype='miriad') data, flags, nsamples = hd.read() aa = get_aa_from_uv(hd) del hd info = aa_to_info(aa, pols=[pols[-1][0]]) reds = info.get_reds() # do the work for jd_list in fullpol_file_list: am = AntennaMetrics(jd_list, reds, fileformat=vis_format) am.iterative_antenna_metrics_and_flagging(crossCut=crossCut, deadCut=deadCut, alwaysDeadCut=alwaysDeadCut, verbose=verbose, run_mean_vij=run_mean_vij, run_red_corr=run_red_corr, run_cross_pols=run_cross_pols) # add history am.history = am.history + history base_filename = jd_list[0] abspath = os.path.abspath(base_filename) dirname = os.path.dirname(abspath) basename = os.path.basename(base_filename) nopol_filename = re.sub('\.{}\.'.format(pols[0]), '.', basename) if metrics_path == '': # default path is same directory as file metrics_path = dirname else: metrics_path = metrics_path metrics_basename = nopol_filename + extension metrics_filename = os.path.join(metrics_path, metrics_basename) am.save_antenna_metrics(metrics_filename) return
def main(): parser = argparse.ArgumentParser(formatter_class=argparse.\ RawDescriptionHelpFormatter, description=textwrap.dedent(""" Across days relative redundant calibration of visibilities Takes HERA visibility datasets across several JDs in uvh5 file format, aligns them in LAST and then performs relative redundant calibration (up to the overall amplitude, overall phase, and phase gradient degenerate parameters) for each frequency channel and each time integration in the dataset. Returns a pickled pandas dataframe of the Scipy optimization results for the relative redundant calibration for each set of frequency channel and time integration. """)) parser.add_argument('jd_time', help='Fractional JD time of dataset to \ align other dataframes to', metavar='JD', type=str) parser.add_argument('-j', '--jds', required=True, metavar='J', \ type=str, help='JDs to calibrate') parser.add_argument('-p', '--pol', required=True, metavar='P', type=str, \ help='Polarization {"ee", "en", "nn", "ne"}') parser.add_argument('-c', '--chans', required=False, default=None, metavar='C', \ type=str, help='Frequency channels to calibrate \ {0, 1023}' ) parser.add_argument('-t', '--tints', required=False, default=None, metavar='T', \ type=str, help='Time integrations to calibrate \ {0, 59}' ) parser.add_argument('-f', '--flag_type', required=False, default='first', \ metavar='F', type=str, help='Flag type e.g. "first", \ "omni", "abs"' ) parser.add_argument('-d', '--dist', required=True, metavar='D', \ type=str, help='Fitting distribution for calibration \ {"cauchy", "gaussian"}' ) parser.add_argument('-v', '--noise', required=False, action='store_true', \ help='Use noise from autos in nlogL calculations') parser.add_argument('-cf', '--chan_flag_pct', required=False, default=None, \ metavar='CFP', type=float, help='Flag channel if more than \ X% of day/time slices for a given channel are flagged' ) parser.add_argument('-o', '--out', required=False, default=None, \ metavar='O', type=str, help='Output csv and df name') parser.add_argument('-u', '--out_dir', required=False, default=None, metavar='U', \ type=str, help='Out directory to store dataframe') parser.add_argument('-n', '--new_df', required=False, action='store_true', \ help='Write data to a new dataframe') parser.add_argument('-k', '--compression', required=False, default=None, metavar='K', \ type=str, help='Compression to use when pickling results dataframe') args = parser.parse_args() startTime = datetime.datetime.now() zen_fn = find_zen_file(args.jd_time) hd = HERAData(zen_fn) out_fn = args.out default_fn = 'xd_rel_df.{}.{}.{}'.format('{:.4f}'.format(hd.lsts[0]), \ args.pol, args.dist) if out_fn is None: out_fn = default_fn if args.out_dir is not None: if not os.path.exists(args.out_dir): os.mkdir(args.out_dir) out_fn = os.path.join(args.out_dir, out_fn) if out_fn is not None: default_fn = os.path.join(args.out_dir, default_fn) out_csv = fn_format(out_fn, 'csv') out_pkl = out_csv.rsplit('.', 1)[0] + '.pkl' csv_exists = os.path.exists(out_csv) pkl_exists = os.path.exists(out_pkl) if csv_exists or pkl_exists: if args.new_df: out_csv = new_fn(out_csv, None, startTime) out_pkl = out_csv.rsplit('.', 1)[0] + '.pkl' csv_exists = False pkl_exists = False JDs = args.jds if JDs == 'idr2_jds': JDs = numpy.asarray(idr2_jds) elif JDs == 'idr2_jdsx': JDs = numpy.asarray(idr2_jdsx) else: if '_' in JDs: JDs = numpy.asarray(JDs.split('_'), dtype=int) else: JDs = mod_str_arg(JDs) JDs = numpy.intersect1d(JDs, idr2_jds) freq_chans = mod_str_arg(args.chans) time_ints = mod_str_arg(args.tints) pchans = args.chans if pchans is None: pchans = '0~{}'.format(hd.Nfreqs - 1) ptints = args.tints if ptints is None: ptints = '0~{}'.format(hd.Ntimes - 1) print('Running relative redundant calibration across JDs {} between LASTS '\ '{:.4f} and {:.4f} for polarization {}, frequency channel(s) {} '\ 'and time integration(s) {}, with {} assumed noise distribution.\n'.\ format(' '.join(map(str, JDs)), hd.lsts[0], hd.lsts[-1], args.pol, \ pchans, ptints, args.dist)) if freq_chans is None: freq_chans = numpy.arange(hd.Nfreqs) if time_ints is None: time_ints = numpy.arange(hd.Ntimes) indices = ['freq', 'time_int'] no_tints = len(time_ints) iter_dims = list(numpy.ndindex((len(freq_chans), no_tints))) skip_cal = False # skipping freqs and tints that are already in the dataframe if csv_exists or pkl_exists: cmap_f = dict(map(reversed, enumerate(freq_chans))) cmap_t = dict(map(reversed, enumerate(time_ints))) if csv_exists: df = pd.read_csv(out_csv, usecols=indices) idx_arr = df.values elif pkl_exists: df_pkl = pd.read_pickle(out_pkl) idx_arr = df_pkl.index.values done = [(cmap_f[f], cmap_t[t]) for (f, t) in idx_arr if (f in freq_chans \ and t in time_ints)] iter_dims = [idim for idim in iter_dims if idim not in done] if not any(iter_dims): print('Solutions to all specified frequency channels and time '\ 'integrations already exist in {}\n'.format(out_pkl)) skip_cal = True if not skip_cal: stdout = io.StringIO() with redirect_stdout(stdout): # suppress output grp = XDgroup_data(args.jd_time, JDs, args.pol, chans=freq_chans, \ tints=time_ints, use_flags=args.flag_type, \ noise=args.noise) if not args.noise: _, RedG, cData = grp noisec = None else: _, RedG, cData, cNData = grp flags = cData.mask cData = cData.data # to get fields for the csv header ants = numpy.unique(RedG[:, 1:]) no_ants = ants.size no_unq_bls = numpy.unique(RedG[:, 0]).size cRedG = relabelAnts(RedG) psize = (no_ants * JDs.size + no_unq_bls) * 2 # discarding 'jac', 'hess_inv', 'nfev', 'njev' slct_keys = ['success', 'status', 'message', 'fun', 'nit', 'x'] header = slct_keys[:-1] + list(numpy.arange(psize)) + indices # remove flagged channels from iter_dims if isinstance(flags, numpy.bool_): # If all flags are the same flags = [flags] if True in flags: if args.chan_flag_pct is None: flg_chans = numpy.unique( numpy.where(flags.all(axis=(0, 2, 3)))[0]) print('Flagged channels across all days are: {}\n'.\ format(freq_chans[flg_chans])) else: flg_pct = args.chan_flag_pct / 100 flg_chans = numpy.unique(numpy.where(flags.all(axis=3).mean(axis=(0, 2)) \ > flg_pct)[0]) print('Flagged channels across all days and those that are '\ 'more than {}% flagged for their given day/time slice are: {}\n'.\ format(args.chan_flag_pct, freq_chans[flg_chans] )) iter_dims = [ idim for idim in iter_dims if idim[0] not in flg_chans ] if not iter_dims: # check if slices to solve are empty print('All specified channels are flagged. Exiting.') sys.exit() def cal(credg, distribution, no_unq_bls, no_ants, obsvis, noise, initp): """Relative redundant calibration across days with doRelCalD: default implementation with unconstrained minimizer using cartesian coordinates """ res_rel, initp_new = doRelCalD(credg, obsvis, no_unq_bls, no_ants, \ distribution=distribution, noise=noise, initp=initp, \ return_initp=True, xd=True) res_rel = {key: res_rel[key] for key in slct_keys} # use solution for next solve in iteration if res_rel['success']: initp = initp_new return res_rel, initp RelCal = functools.partial(cal, cRedG, args.dist, no_unq_bls, no_ants) with redirect_stdout(stdout): # suppress output with open(out_csv, 'a') as f: # write / append to csv file writer = DictWriter(f, fieldnames=header) if not csv_exists: writer.writeheader() initp = None for i, iter_dim in enumerate(iter_dims): if args.noise: noisec = cNData[:, iter_dim[0], iter_dim[1], :] res_rel, initp = RelCal(cData[:, iter_dim[0], iter_dim[1], :], \ noisec, initp) # expanding out the solution for j, param in enumerate(res_rel['x']): res_rel[j] = param # reset initp after each frequency slice if not (i + 1) % no_tints: initp = None del res_rel['x'] res_rel.update({indices[0]:freq_chans[iter_dim[0]], \ indices[1]:time_ints[iter_dim[1]]}) writer.writerow(res_rel) print('Relative calibration results saved to csv file {}'.format( out_csv)) df = pd.read_csv(out_csv) if csv_exists: freqs = df['freq'].unique() tints = df['time_int'].unique() if cData.shape[0] != freqs.size or cData.shape[1] != tints.size: _, _, cData = XDgroup_data(args.jd_time, JDs, args.pol, chans=freqs, tints=tints, use_flags=args.flag_type, \ noise=None) cData = cData.data df.set_index(indices, inplace=True) # we now append the residuals as additional columns df = append_residuals_rel(df, cData, cRedG, 'cartesian', out_fn=None) if pkl_exists and not csv_exists: df = pd.concat([df, df_pkl]) df.sort_values(by=indices, inplace=True) if args.compression is not None: out_pkl += '.{}'.format(args.compression) print('{} compression used in pickling the dataframe'.format( args.compression)) df.to_pickle(out_pkl, compression=args.compression) print('Relative calibration results dataframe pickled to {}'.format( out_pkl)) # creating metadata file out_md = default_fn.rsplit('.', 1)[0] + '.md.pkl' if not os.path.exists(out_md): md = {'no_ants':no_ants, 'no_unq_bls':no_unq_bls, 'redg':RedG, \ 'antpos':hd.antpos, 'last':hd.lsts, 'Nfreqs':hd.Nfreqs, \ 'Ntimes':hd.Ntimes, 'JDs':JDs} with open(out_md, 'wb') as f: pickle.dump(md, f, protocol=pickle.HIGHEST_PROTOCOL) print( 'Relative calibration metadata pickled to {}\n'.format(out_md)) print('Script run time: {}'.format(datetime.datetime.now() - startTime))