Ejemplo n.º 1
0
    def __init__(self, dataFileList, reds, fileformat='miriad'):
        """Initilize an AntennaMetrics object.

        Arguments:
            dataFileList: List of data filenames of the four different visibility polarizations for the same observation
            reds: List of lists of tuples of antenna numbers that make up redundant baseline groups.
            format: File type of data
                    Supports: 'miriad','uvfits', 'fhd', 'ms ' (see pyuvdata docs)
                    Default: 'miriad'.
        """
        from hera_cal.io import HERAData

        if fileformat == 'miriad':
            self.hd = HERAData(dataFileList, filetype='miriad')
        elif fileformat == 'uvfits':
            self.hd = HERAData(dataFileList, filetype='uvfits')
        elif fileformat == 'fhd':
            raise NotImplemented(str(fileformat) + 'not supported')
        else:
            raise ValueError('Unrecognized file format ' + str(fileformat))

        self.data, self.flags, self.nsamples = self.hd.read()
        self.ants = self.hd.get_ants()
        self.pols = [pol.lower() for pol in self.hd.get_pols()]
        self.antpols = [antpol.lower() for antpol in self.hd.get_feedpols()]
        self.bls = self.hd.get_antpairs()
        self.dataFileList = dataFileList
        self.reds = reds
        self.version_str = hera_qm_version_str
        self.history = ''

        if len(self.antpols) is not 2 or len(self.pols) is not 4:
            raise ValueError('Missing polarization information. pols ='
                             + str(self.pols) + ' and antpols = '
                             + str(self.antpols))
Ejemplo n.º 2
0
def reds_from_file(filename, vis_format='miriad'):
    """Get the redundant baseline pairs from a file.

    This is a wrapper around hera_cal.redcal.get_pos_reds that doesn't read
    the data file if it's possible to only read metadata.

    Parameters
    ----------
    filename : str
        The file to get reds from.
    vis_format : {'miriad', 'uvh5', 'uvfits', 'fhd', 'ms'}, optional
        Format of the data file. Default is 'miriad'.

    Returns
    -------
    reds : list of lists of tuples
        Each tuple represents antenna pairs. These are compiled in a list within
        a redundant group, and the outer list is all the redundant groups.
        See hera_cal.redcal.get_pos_reds.

    """
    from hera_cal.io import HERAData
    from hera_cal.redcal import get_pos_reds

    hd = HERAData(filename, filetype=vis_format)
    if hd.antpos is None:
        reds = get_pos_reds(hd.read()[0].antpos)
    else:
        reds = get_pos_reds(hd.antpos)
    del hd
    return reds
Ejemplo n.º 3
0
    def __init__(self, dataFileList, reds, fileformat='miriad'):
        """Initilize an AntennaMetrics object.

        Parameters
        ----------
        dataFileList : list of str
            List of data filenames of the four different visibility polarizations
            for the same observation.
        reds : list of tuples of ints
            List of lists of tuples of antenna numbers that make up redundant baseline groups.
        format : str, optional
            File type of data. Must be one of: 'miriad', 'uvh5', 'uvfits', 'fhd',
            'ms' (see pyuvdata docs). Default is 'miriad'.

        Attributes
        ----------
        hd : HERAData
            HERAData object generated from dataFileList.
        data : array
            Data contained in HERAData object.
        flags : array
            Flags contained in HERAData object.
        nsamples : array
            Nsamples contained in HERAData object.
        ants : list of ints
            List of antennas in HERAData object.
        pols : list of str
            List of polarizations in HERAData object.
        bls : list of ints
            List of baselines in HERAData object.
        dataFileList : list of str
            List of data filenames of the four different visibility polarizations
            for the same observation.
        reds : list of tuples of ints
            List of lists of tuples of antenna numbers that make up redundant baseline groups.
        version_str : str
            The version of the hera_qm module used to generate these metrics.
        history : str
            History to append to the metrics files when writing out files.

        """
        from hera_cal.io import HERAData

        self.hd = HERAData(dataFileList, filetype=fileformat)

        self.data, self.flags, self.nsamples = self.hd.read()
        self.ants = self.hd.get_ants()
        self.pols = [pol.lower() for pol in self.hd.get_pols()]
        self.antpols = [antpol.lower() for antpol in self.hd.get_feedpols()]
        self.bls = self.hd.get_antpairs()
        self.dataFileList = dataFileList
        self.reds = reds
        self.version_str = hera_qm_version_str
        self.history = ''

        if len(self.antpols) != 2 or len(self.pols) != 4:
            raise ValueError('Missing polarization information. pols ='
                             + str(self.pols) + ' and antpols = '
                             + str(self.antpols))
Ejemplo n.º 4
0
def test_init_HERAData():
    uv = UVData()
    uv.read_miriad(test_d_file)
    uvf1 = UVFlag(uv)
    hd = HERAData(test_d_file, filetype='miriad')
    hd.read()
    uvf2 = UVFlag(hd)
    nt.assert_equal(uvf1, uvf2)
Ejemplo n.º 5
0
def test_reds_from_file_read_file():
    from hera_cal.io import HERAData
    from hera_cal.redcal import get_pos_reds

    # Miriad file will need to be read in
    testfile = os.path.join(DATA_PATH, 'zen.2457698.40355.xx.HH.uvcAA')
    reds = ant_metrics.reds_from_file(testfile, vis_format='miriad')
    assert len(reds) > 1
    hd = HERAData(testfile, filetype='miriad')
    reds_check = get_pos_reds(hd.read()[0].antpos)
    assert reds == reds_check
Ejemplo n.º 6
0
def calfits_to_flags(JD_time, cal_type, pol='ee', add_bad_ants=None):
    """Returns flags array from calfits file

    :param JD_time: Fractional Julian date
    :type JD_time: float, str
    :param cal_type: Calibration process that produced the calfits file {"first",
    "omni", "abs", "flagged_abs", "smooth_abs"}
    :type cal_type: str
    :param pol: Polarization of data
    :type pol: str
    :param add_bad_ants: Additional bad antennas
    :type add_bad_ants: None, int, list, ndarray

    :return: Flags array
    :rtype: ndarray
    """

    zen_fn = find_zen_file(JD_time)
    flags_fn = find_flag_file(JD_time, cal_type)
    bad_ants = get_bad_ants(zen_fn)
    if add_bad_ants is not None:
        bad_ants = numpy.sort(numpy.append(bad_ants,
                                           numpy.array(add_bad_ants)))

    hc = HERACal(flags_fn)
    _, cal_flags, _, _ = hc.read()

    hd = HERAData(zen_fn)
    reds = get_reds(hd.antpos, pols=[pol])
    reds = fltBad(reds, bad_ants)
    redg = groupBls(reds)

    antpairs = redg[:, 1:]
    cflag = numpy.empty((hd.Nfreqs, hd.Ntimes, redg.shape[0]), dtype=bool)
    for g in range(redg.shape[0]):
        cflag[:, :, g] = cal_flags[(int(antpairs[g, 0]), 'J{}'.format(pol)) or \
                                   (int(antpairs[g, 1]), 'J{}'.format(pol))].transpose()

    return cflag
Ejemplo n.º 7
0
class AntennaMetrics():
    """Container for holding data and meta-data for ant metrics calculations.

    This class creates an object for holding relevant visibility data and metadata,
    and provides interfaces to four antenna metrics: two identify dead antennas,
    and two identify cross-polarized antennas. These metrics can be used iteratively
    to identify bad antennas. The object handles all stroage of metrics, and supports
    writing metrics to an HDF5 filetype. The analysis functions are designed to work
    on raw data from a single observation with all four polarizations.

    """

    def __init__(self, dataFileList, reds, fileformat='miriad'):
        """Initilize an AntennaMetrics object.

        Parameters
        ----------
        dataFileList : list of str
            List of data filenames of the four different visibility polarizations
            for the same observation.
        reds : list of tuples of ints
            List of lists of tuples of antenna numbers that make up redundant baseline groups.
        format : str, optional
            File type of data. Must be one of: 'miriad', 'uvh5', 'uvfits', 'fhd',
            'ms' (see pyuvdata docs). Default is 'miriad'.

        Attributes
        ----------
        hd : HERAData
            HERAData object generated from dataFileList.
        data : array
            Data contained in HERAData object.
        flags : array
            Flags contained in HERAData object.
        nsamples : array
            Nsamples contained in HERAData object.
        ants : list of ints
            List of antennas in HERAData object.
        pols : list of str
            List of polarizations in HERAData object.
        bls : list of ints
            List of baselines in HERAData object.
        dataFileList : list of str
            List of data filenames of the four different visibility polarizations
            for the same observation.
        reds : list of tuples of ints
            List of lists of tuples of antenna numbers that make up redundant baseline groups.
        version_str : str
            The version of the hera_qm module used to generate these metrics.
        history : str
            History to append to the metrics files when writing out files.

        """
        from hera_cal.io import HERAData

        self.hd = HERAData(dataFileList, filetype=fileformat)

        self.data, self.flags, self.nsamples = self.hd.read()
        self.ants = self.hd.get_ants()
        self.pols = [pol.lower() for pol in self.hd.get_pols()]
        self.antpols = [antpol.lower() for antpol in self.hd.get_feedpols()]
        self.bls = self.hd.get_antpairs()
        self.dataFileList = dataFileList
        self.reds = reds
        self.version_str = hera_qm_version_str
        self.history = ''

        if len(self.antpols) != 2 or len(self.pols) != 4:
            raise ValueError('Missing polarization information. pols ='
                             + str(self.pols) + ' and antpols = '
                             + str(self.antpols))

    def mean_Vij_metrics(self, pols=None, xants=[], rawMetric=False):
        """Calculate how an antennas's average |Vij| deviates from others.

        Local wrapper for mean_Vij_metrics in hera_qm.ant_metrics module

        Parameters
        ----------
        pols : list of str, optional
            List of visibility polarizations (e.g. ['xx','xy','yx','yy']).
            Default is self.pols.
        xants : list of tuples, optional
            List of antenna-polarization tuples that should be ignored. The
            expected format is (ant, antpol). Default is empty list.
        rawMetric : bool, optional
            If True, return the raw mean Vij metric instead of the modified z-score.
            Default is False.

        Returns
        -------
        meanMetrics : dict
            Dictionary indexed by (ant, antpol) keys. Contains the modified z-score
            of the mean of the absolute value of all visibilities associated with
            an antenna. Very small or very large numbers are probably bad antennas.

        """
        if pols is None:
            pols = self.pols
        return mean_Vij_metrics(self.data, pols, self.antpols,
                                self.ants, self.bls, xants=xants,
                                rawMetric=rawMetric)

    def red_corr_metrics(self, pols=None, xants=[], rawMetric=False,
                         crossPol=False):
        """Calculate modified Z-Score over all redundant groups for each antenna.

        This method is a local wrapper for red_corr_metrics. It calculates the extent
        to which baselines involving an antenna do not correlate with others they are
        nominmally redundant with.

        Parameters
        ----------
        data : array or HERAData
            Data for all polarizations, stored in a format that supports indexing
            as data[i,j,pol].
        pols : list of str, optional
            List of visibility polarizations (e.g. ['xx','xy','yx','yy']).
            Default is self.pols.
        xants : list of tuples, optional
            List of antenna-polarization tuples that should be ignored. The
            expected format is (ant, antpol). Default is empty list.
        rawMetric : bool, optional
            If True, return the raw power correlations instead of the modified z-score.
            Default is False.
        crossPol : bool, optional
            If True, return results only when the two visibility polarizations differ
            by a single flip. Default is False.

        Returns
        -------
        powerRedMetric : dict
            Dictionary indexed by (ant,antpol) keys. Contains the modified z-scores
            of the mean power correlations inside redundant baseline groups associated
            with each antenna. Very small numbers are probably bad antennas.

        """
        if pols is None:
            pols = self.pols
        return red_corr_metrics(self.data, pols, self.antpols,
                                self.ants, self.reds, xants=xants,
                                rawMetric=rawMetric, crossPol=crossPol)

    def mean_Vij_cross_pol_metrics(self, xants=[], rawMetric=False):
        """Calculate the ratio of cross-pol visibilities to same-pol visibilities.

        This method is a local wrapper for mean_Vij_cross_pol_metrics. It finds
        which antennas are outliers based on the ratio of mean cross-pol visibilities
        to mean same-pol visibilities:
            (Vxy+Vyx)/(Vxx+Vyy).

        Parameters
        ----------
        xants : list of tuples, optional
            List of antenna-polarization tuples that should be ignored. The
            expected format is (ant, antpol). Default is empty list.
        rawMetric : bool, optional
            If True, return the raw power correlations instead of the modified z-score.
            Default is False.

        Returns
        -------
        mean_Vij_cross_pol_metrics : dict
            Dictionary indexed by (ant,antpol) keys. Contains the modified z-scores of the
            ratio of mean visibilities, (Vxy+Vyx)/(Vxx+Vyy). Results are duplicated in
            both antpols. Very large values are likely cross-polarized.

        """
        return mean_Vij_cross_pol_metrics(self.data, self.pols,
                                          self.antpols, self.ants,
                                          self.bls, xants=xants,
                                          rawMetric=rawMetric)

    def red_corr_cross_pol_metrics(self, xants=[], rawMetric=False):
        """Calculate modified Z-Score over redundant groups; assume cross-polarized.

        This method is a local wrapper for red_corr_cross_pol_metrics. It finds
        which antennas are part of visibilities that are significantly better
        correlated with polarization-flipped visibilities in a redundant group.
        It returns the modified z-score.

        Parameters
        ----------
        xants : list of tuples, optional
            List of antenna-polarization tuples that should be ignored. The
            expected format is (ant, antpol). Default is empty list.
        rawMetric : bool, optional
            If True, return the raw power correlations instead of the modified z-score.
            Default is False.

        Returns
        -------
        redCorrCrossPolMetrics : dict
            Dictionary indexed by (ant,antpol) keys. Contains the modified z-scores
            of the mean correlation ratio between redundant visibilities and singly-
            polarization flipped ones. Very large values are probably cross-polarized.

        """
        return red_corr_cross_pol_metrics(self.data, self.pols,
                                          self.antpols, self.ants,
                                          self.reds, xants=xants,
                                          rawMetric=rawMetric)

    def reset_summary_stats(self):
        """Reset all the internal summary statistics back to empty."""
        self.xants, self.crossedAntsRemoved, self.deadAntsRemoved = [], [], []
        self.iter = 0
        self.removalIter = {}
        self.allMetrics, self.allModzScores = OrderedDict(), OrderedDict()
        self.finalMetrics, self.finalModzScores = {}, {}

    def find_totally_dead_ants(self):
        """Flag antennas whose median autoPower is 0.0.

        These antennas are marked as dead. They do not appear in recorded antenna
        metrics or zscores. Their removal iteration is -1 (i.e. before iterative
        flagging).
        """
        autoPowers = compute_median_auto_power_dict(self.data,
                                                    self.pols,
                                                    self.reds)
        power_list_by_ant = {(ant, antpol): []
                             for ant in self.ants
                             for antpol in self.antpols
                             if (ant, antpol) not in self.xants}
        for ((ant0, ant1, pol), power) in autoPowers.items():
            if ((ant0, pol[0]) not in self.xants
                    and (ant1, pol[1]) not in self.xants):
                power_list_by_ant[(ant0, pol[0])].append(power)
                power_list_by_ant[(ant1, pol[1])].append(power)
        for (key, val) in power_list_by_ant.items():
            if np.median(val) == 0:
                self.xants.append(key)
                self.deadAntsRemoved.append(key)
                self.removalIter[key] = -1

    def _run_all_metrics(self, run_mean_vij=True, run_red_corr=True,
                         run_cross_pols=True, run_cross_pols_only=False):
        """Local call for all metrics as part of iterative flagging method.

        Parameters
        ----------
        run_mean_vij : bool, optional
            Define if mean_Vij_metrics or mean_Vij_cross_pol_metrics are executed.
            Default is True.
        run_red_corr : bool, optional
            Define if red_corr_metrics or red_corr_cross_pol_metrics are executed.
            Default is True.
        run_cross_pols : bool, optional
            Define if mean_Vij_cross_pol_metrics and red_corr_cross_pol_metrics
            are executed. Default is True. Individual rules are inherited from
            run_mean_vij and run_red_corr.
        run_cross_pols_only : bool, optional
            Define if cross pol metrics are the *only* metrics to be run.
            Default is False.

        """
        # Compute all raw metrics
        metNames = []
        metVals = []

        if run_mean_vij and not run_cross_pols_only:
            metNames.append('meanVij')
            meanVij = self.mean_Vij_metrics(pols=self.pols,
                                            xants=self.xants,
                                            rawMetric=True)
            metVals.append(meanVij)

        if run_red_corr and not run_cross_pols_only:
            metNames.append('redCorr')
            pols = [pol for pol in self.pols if pol[0] == pol[1]]
            redCorr = self.red_corr_metrics(pols=pols,
                                            xants=self.xants,
                                            rawMetric=True)
            metVals.append(redCorr)

        if run_cross_pols:
            if run_mean_vij:
                metNames.append('meanVijXPol')
                meanVijXPol = self.mean_Vij_cross_pol_metrics(xants=self.xants,
                                                              rawMetric=True)
                metVals.append(meanVijXPol)
            if run_red_corr:
                metNames.append('redCorrXPol')
                redCorrXPol = self.red_corr_cross_pol_metrics(xants=self.xants,
                                                              rawMetric=True)
                metVals.append(redCorrXPol)

        # Save all metrics and zscores
        metrics, modzScores = {}, {}
        for metric, metName in zip(metVals, metNames):
            metrics[metName] = metric
            modz = per_antenna_modified_z_scores(metric)
            modzScores[metName] = modz
            for key in metric:
                if metName in self.finalMetrics:
                    self.finalMetrics[metName][key] = metric[key]
                    self.finalModzScores[metName][key] = modz[key]
                else:
                    self.finalMetrics[metName] = {key: metric[key]}
                    self.finalModzScores[metName] = {key: modz[key]}
        self.allMetrics.update({self.iter: metrics})
        self.allModzScores.update({self.iter: modzScores})

    def iterative_antenna_metrics_and_flagging(self, crossCut=5, deadCut=5,
                                               alwaysDeadCut=10,
                                               verbose=False,
                                               run_mean_vij=True,
                                               run_red_corr=True,
                                               run_cross_pols=True,
                                               run_cross_pols_only=False):
        """Run all four antenna metrics and stores results in self.

        Runs all four metrics: two for dead antennas, two for cross-polarized antennas.
        Saves the results internally to this this antenna metrics object.

        Parameters
        ----------
        crossCut : float, optional
            Modified z-score cut for most cross-polarized antennas. Default is 5 "sigmas".
        deadCut : float, optional
            Modified z-score cut for most likely dead antennas. Default is 5 "sigmas".
        alwaysDeadCut : float, optional
            Modified z-score cut for definitely dead antennas. Default is 10 "sigmas".
            These are all thrown away at once without waiting to iteratively throw away
            only the worst offender.
        run_mean_vij : bool, optional
            Define if mean_Vij_metrics or mean_Vij_cross_pol_metrics are executed.
            Default is True.
        run_red_corr : bool, optional
            Define if red_corr_metrics or red_corr_cross_pol_metrics are executed.
            Default is True.
        run_cross_pols : bool, optional
            Define if mean_Vij_cross_pol_metrics and red_corr_cross_pol_metrics
            are executed. Default is True. Individual rules are inherited from
            run_mean_vij and run_red_corr.
        run_cross_pols_only : bool, optional
            Define if cross pol metrics are the *only* metrics to be run. Default
            is False.

        """
        self.reset_summary_stats()
        self.find_totally_dead_ants()
        self.crossCut, self.deadCut = crossCut, deadCut
        self.alwaysDeadCut = alwaysDeadCut

        # Loop over
        for iter in range(len(self.antpols) * len(self.ants)):
            self.iter = iter
            self._run_all_metrics(run_mean_vij=run_mean_vij,
                                  run_red_corr=run_red_corr,
                                  run_cross_pols=run_cross_pols,
                                  run_cross_pols_only=run_cross_pols_only)

            # Mostly likely dead antenna
            last_iter = list(self.allModzScores)[-1]
            worstDeadCutRatio = -1
            worstCrossCutRatio = -1

            if run_mean_vij and run_red_corr and not run_cross_pols_only:
                deadMetrics = average_abs_metrics(self.allModzScores[last_iter]['meanVij'],
                                                  self.allModzScores[last_iter]['redCorr'])
            else:
                if run_mean_vij and not run_cross_pols_only:
                    deadMetrics = self.allModzScores[last_iter]['meanVij'].copy()
                elif run_red_corr and not run_cross_pols_only:
                    deadMetrics = self.allModzScores[last_iter]['redCorr'].copy()
            try:
                worstDeadAnt = max(deadMetrics, key=deadMetrics.get)
                worstDeadCutRatio = np.abs(deadMetrics[worstDeadAnt]) / deadCut
            except NameError:
                # Dead metrics weren't run, but that's fine.
                pass

            if run_cross_pols:
                # Most likely cross-polarized antenna
                if run_mean_vij and run_red_corr:
                    crossMetrics = average_abs_metrics(self.allModzScores[last_iter]['meanVijXPol'],
                                                       self.allModzScores[last_iter]['redCorrXPol'])
                elif run_mean_vij:
                    crossMetrics = self.allModzScores[last_iter]['meanVijXPol'].copy()
                elif run_red_corr:
                    crossMetrics = self.allModzScores[last_iter]['redCorrXPol'].copy()
                worstCrossAnt = max(crossMetrics, key=crossMetrics.get)
                worstCrossCutRatio = (np.abs(crossMetrics[worstCrossAnt])
                                      / crossCut)

            # Find the single worst antenna, remove it, log it, and run again
            if (worstCrossCutRatio >= worstDeadCutRatio
                    and worstCrossCutRatio >= 1.0):
                for antpol in self.antpols:
                    self.xants.append((worstCrossAnt[0], antpol))
                    self.crossedAntsRemoved.append((worstCrossAnt[0], antpol))
                    self.removalIter[(worstCrossAnt[0], antpol)] = iter
                    if verbose:
                        print('On iteration', iter, 'we flag\t', end='')
                        print((worstCrossAnt[0], antpol))
            elif (worstDeadCutRatio > worstCrossCutRatio
                    and worstDeadCutRatio > 1.0):
                dead_ants = set([worstDeadAnt])
                for (ant, metric) in deadMetrics.items():
                    if metric > alwaysDeadCut:
                        dead_ants.add(ant)
                for dead_ant in dead_ants:
                    self.xants.append(dead_ant)
                    self.deadAntsRemoved.append(dead_ant)
                    self.removalIter[dead_ant] = iter
                    if verbose:
                        print('On iteration', iter, 'we flag', dead_ant)
            else:
                break

    def save_antenna_metrics(self, filename, overwrite=False):
        """Output all meta-metrics and cut decisions to HDF5 file.

        Saves all cut decisions and meta-metrics in an HDF5 that can be loaded
        back into a dictionary using hera_qm.ant_metrics.load_antenna_metrics()

        Parameters
        ----------
        filename : str
            The file into which metrics will be written.
        overwrite: bool, optional
            Whether to overwrite an existing file. Default is False.

        """
        if not hasattr(self, 'xants'):
            raise KeyError(('Must run AntennaMetrics.'
                            'iterative_antenna_metrics_and_flagging() first.'))

        out_dict = {'xants': self.xants}
        out_dict['crossed_ants'] = self.crossedAntsRemoved
        out_dict['dead_ants'] = self.deadAntsRemoved
        out_dict['final_metrics'] = self.finalMetrics
        out_dict['all_metrics'] = self.allMetrics
        out_dict['final_mod_z_scores'] = self.finalModzScores
        out_dict['all_mod_z_scores'] = self.allModzScores
        out_dict['removal_iteration'] = self.removalIter
        out_dict['cross_pol_z_cut'] = self.crossCut
        out_dict['dead_ant_z_cut'] = self.deadCut
        out_dict['always_dead_ant_z_cut'] = self.alwaysDeadCut
        out_dict['datafile_list'] = self.dataFileList
        out_dict['reds'] = self.reds

        metrics_io.write_metric_file(filename, out_dict, overwrite=overwrite)
Ejemplo n.º 8
0
def main():
    parser = argparse.ArgumentParser(formatter_class=argparse.\
    RawDescriptionHelpFormatter, description=textwrap.dedent("""
    Relative redundant calibration of visibilities

    Takes a given HERA visibility dataset in uvh5 file format and performs
    relative redundant calibration (up to the overall amplitude, overall
    phase, and phase gradient degenerate parameters) for each frequency channel
    and each time integration in the dataset.

    Returns a pickled pandas dataframe of the Scipy optimization results for
    the relative redundant calibration for each set of frequency channel and
    time integration.
    """))
    parser.add_argument('jd_time',
                        help='Fractional JD time of dataset to \
                        calibrate',
                        metavar='JD',
                        type=str)
    parser.add_argument('-o', '--out', required=False, default=None, \
                        metavar='O', type=str, help='Output csv and df name')
    parser.add_argument('-p', '--pol', required=True, metavar='P', type=str, \
                        help='Polarization {"ee", "en", "nn", "ne"}')
    parser.add_argument('-c', '--chans', required=False, default=None, metavar='C', \
                        type=str, help='Frequency channels to calibrate \
                        {0, 1023}'                                  )
    parser.add_argument('-t', '--tints', required=False, default=None, metavar='T', \
                        type=str, help='Time integrations to calibrate \
                        {0, 59}'                                )
    parser.add_argument('-f', '--flag_type', required=False, default='first', \
                        metavar='F', type=str, help='Flag type e.g. "first", \
                        "omni", "abs"'                                      )
    parser.add_argument('-d', '--dist', required=True, metavar='D', \
                        type=str, help='Fitting distribution for calibration \
                        {"cauchy", "gaussian"}'                                               )
    parser.add_argument('-m', '--method', required=False, default='cartesian', \
                        metavar='M', type=str, help='Method to use - {"cartesian", \
                        "polar", "RP"}, where RP stands for reduced parameters'                                                                               )
    parser.add_argument('-l', '--logamp', required=False, action='store_true', \
                        help='Use logamp method to force positive gain amplitudes')
    parser.add_argument('-g', '--tilt_reg', required=False, action='store_true', \
                        help='Add regularization term to constrain tilt shifts to 0')
    parser.add_argument('-a', '--gphase_reg', required=False, action='store_true', \
                        help='Add regularization term to constrain the gain phase mean')
    parser.add_argument('-i', '--initp_jd', required=False, default=None, metavar='I', \
                        type=int, help='JD of to find datasets to reuse initial parameters')
    parser.add_argument('-v', '--noise', required=False, action='store_true', \
                        help='Use noise from autos in nlogL calculations')
    parser.add_argument('-u', '--out_dir', required=False, default=None, metavar='U', \
                        type=str, help='Out directory to store dataframe')
    parser.add_argument('-n', '--new_df', required=False, action='store_true', \
                        help='Write data to a new dataframe')
    args = parser.parse_args()

    startTime = datetime.datetime.now()

    out_fn = args.out
    default_fn = 'rel_df.{}.{}.{}'.format(args.jd_time, args.pol, args.dist)
    if out_fn is None:
        out_fn = default_fn
    if args.out_dir is not None:
        if not os.path.exists(args.out_dir):
            os.mkdir(args.out_dir)
        out_fn = os.path.join(args.out_dir, out_fn)
        if out_fn is not None:
            default_fn = os.path.join(args.out_dir, default_fn)

    out_csv = fn_format(out_fn, 'csv')
    out_pkl = out_csv.rsplit('.', 1)[0] + '.pkl'
    csv_exists = os.path.exists(out_csv)
    pkl_exists = os.path.exists(out_pkl)
    if csv_exists or pkl_exists:
        if args.new_df:
            out_csv = new_fn(out_csv, None, startTime)
            out_pkl = out_csv.rsplit('.', 1)[0] + '.pkl'
            csv_exists = False
            pkl_exists = False

    zen_fn = find_zen_file(args.jd_time)
    bad_ants = get_bad_ants(zen_fn)

    flag_type = args.flag_type
    if flag_type is not None:
        flag_fn = find_flag_file(args.jd_time, flag_type)
    else:
        flag_fn = None

    freq_chans = mod_str_arg(args.chans)
    time_ints = mod_str_arg(args.tints)

    hd = HERAData(zen_fn)

    pchans = args.chans
    if pchans is None:
        pchans = '0~{}'.format(hd.Nfreqs - 1)
    ptints = args.tints
    if ptints is None:
        ptints = '0~{}'.format(hd.Ntimes - 1)
    print('Running relative redundant calibration on visibility dataset {} for '\
          'polarization {}, frequency channel(s) {} and time integration(s) {} '\
          'with {} assumed noise distribution\n'.\
          format(os.path.basename(zen_fn), args.pol, pchans, ptints, args.dist))

    if freq_chans is None:
        freq_chans = numpy.arange(hd.Nfreqs)
    if time_ints is None:
        time_ints = numpy.arange(hd.Ntimes)

    indices = ['freq', 'time_int']

    no_tints = len(time_ints)
    iter_dims = list(numpy.ndindex((len(freq_chans), no_tints)))
    skip_cal = False
    # skipping freqs and tints that are already in the dataframe
    if csv_exists or pkl_exists:
        cmap_f = dict(map(reversed, enumerate(freq_chans)))
        cmap_t = dict(map(reversed, enumerate(time_ints)))
        if csv_exists:
            df = pd.read_csv(out_csv, usecols=indices)
            idx_arr = df.values
        elif pkl_exists:
            df_pkl = pd.read_pickle(out_pkl)
            idx_arr = df_pkl.index.values
        done = [(cmap_f[f], cmap_t[t]) for (f, t) in idx_arr if (f in freq_chans \
        and t in time_ints)]
        iter_dims = [idim for idim in iter_dims if idim not in done]
        if not any(iter_dims):
            print('Solutions to all specified frequency channels and time '\
                  'integrations already exist in {}\n'.format(out_pkl))
            skip_cal = True

    if not skip_cal:
        grp = group_data(zen_fn, args.pol, freq_chans, time_ints, \
                         bad_ants, flag_path=flag_fn, noise=args.noise)
        if not args.noise:
            _, RedG, cData = grp
            noisec = None
        else:
            _, RedG, cData, cNData = grp

        flags = cData.mask
        cData = cData.data

        # to get fields for the csv header
        ants = numpy.unique(RedG[:, 1:])
        no_ants = ants.size
        no_unq_bls = numpy.unique(RedG[:, 0]).size
        cRedG = relabelAnts(RedG)
        psize = (no_ants + no_unq_bls) * 2
        if args.tilt_reg:
            ant_pos_arr = flt_ant_pos(hd.antpos, ants)
        else:
            ant_pos_arr = None

        # discarding 'jac', 'hess_inv', 'nfev', 'njev'
        slct_keys = ['success', 'status', 'message', 'fun', 'nit', 'x']
        header = slct_keys[:-1] + list(numpy.arange(psize)) + indices

        # remove flagged channels from iter_dims
        if True in flags:
            flg_chans = numpy.where(flags.all(axis=(1, 2)))[0]  # indices
            print('Flagged channels for visibility dataset {} are: {}\n'.\
                 format(os.path.basename(zen_fn), freq_chans[flg_chans]))
            iter_dims = [
                idim for idim in iter_dims if idim[0] not in flg_chans
            ]
            if not iter_dims:  # check if slices to solve are empty
                print('All specified channels are flagged. Exiting.')
                sys.exit()

        if args.initp_jd is not None:
            jd_time2 = match_lst(args.jd_time, args.initp_jd)
            if len(str(jd_time2)) < 13:
                jd_time2 = str(
                    jd_time2
                ) + '0'  # add a trailing 0 that is omitted in float
            rel_df_path1 = find_rel_df(jd_time2, args.pol, args.dist)
            if isinstance(jd_time2, str):
                jd_time2 = float(jd_time2)

            last_df = pd.read_pickle('jd_lst_map_idr2.pkl')
            last1 = last_df[last_df['JD_time'] == float(
                args.jd_time)]['LASTs'].values[0]
            last2 = last_df[last_df['JD_time'] == jd_time2]['LASTs'].values[0]
            _, offset = find_nearest(last2, last1[0])

            rel_df1 = pd.read_pickle(rel_df_path1)
            rel_df1 = rel_df1[
                rel_df1.index.get_level_values('time_int') >= offset]

            next_row = numpy.where(last_df['JD_time'] == jd_time2)[0][0] + 1
            rel_df_path2 = find_rel_df(last_df.iloc[next_row]['JD_time'], args.pol, \
                                       args.dist)
            rel_df2 = pd.read_pickle(rel_df_path2)
            rel_df2 = rel_df2[
                rel_df2.index.get_level_values('time_int') < offset]

            rel_df_c = pd.concat([rel_df1, rel_df2])

            # filter by specified channels and time integrations
            time_ints_offset = (time_ints + offset) % hd.Ntimes
            freq_flt = numpy.in1d(rel_df_c.index.get_level_values('freq'),
                                  freq_chans)
            tint_flt = numpy.in1d(rel_df_c.index.get_level_values('time_int'),
                                  time_ints_offset)
            rel_df_c = rel_df_c[freq_flt & tint_flt]

            time_ints2 = numpy.tile(
                rel_df_c.index.get_level_values('time_int').unique().values,
                freq_chans.size)
            iter_dims = [
                idim + (tint, ) for idim, tint in zip(iter_dims, time_ints2)
            ]

            phase_reg_initp = True
        else:
            phase_reg_initp = False


        def cal(credg, distribution, coords, no_unq_bls, no_ants, logamp, \
                tilt_reg, gphase_reg, ant_pos_arr, obsvis, noise, initp):
            """Relative redundant calibration with doRelCal: unconstrained
            minimizer using cartesian coordinates - this is the fastest solver

            :param credg: Grouped baselines, condensed so that antennas are
            consecutively labelled. See relabelAnts
            :type credg: ndarray
            :param distribution: Distribution to fit likelihood {'gaussian', 'cauchy'}
            :type distribution: str
            :param coords: Coordinate system in which gain and visibility parameters
            have been set up
            :type coords: str {"cartesian", "polar"}
            :param no_unq_bls: Number of unique baselines (equivalently the number of
            redundant visibilities)
            :type no_unq_bls: int
            :param no_ants: Number of antennas for given observation
            :type no_ants: int
            :param logamp: The logarithm of the amplitude initial parameters is taken,
            such that only positive solutions can be returned. Only if coords=="polar".
            :type logamp: bool
            :param tilt_reg: Add regularization term to constrain tilt shifts to 0
            :type tilt_reg: bool
            :param gphase_reg: Add regularization term to constrain the gain phase mean
            :type gphase_reg: bool
            :param ant_pos_arr: Array of filtered antenna position coordinates for the antennas
            in ants. See flt_ant_pos.
            :type ant_pos_arr: ndarray
            :param obsvis: Observed sky visibilities for a given frequency and given time,
            reformatted to have format consistent with redg
            :type obsvis: ndarray
            :param noise: Noise array to feed into log-likelihood calculations
            :type noise: ndarray
            :param initp: Initial parameter guesses for true visibilities and gains
            :type initp: ndarray, None

            :return: Optimization result for the solved antenna gains and true sky
            visibilities
            :rtype: Scipy optimization result object
            """
            res_rel, initp_new = doRelCal(credg, obsvis, no_unq_bls, no_ants, \
                coords=coords, distribution=distribution, noise=noise, \
                norm_gains=True, logamp=logamp, tilt_reg=tilt_reg, \
                gphase_reg=gphase_reg, ant_pos_arr=ant_pos_arr, initp=initp, \
                return_initp=True, phase_reg_initp=phase_reg_initp)
            res_rel = {key: res_rel[key] for key in slct_keys}
            # use solution for next solve in iteration
            if res_rel['success']:
                initp = initp_new
            return res_rel, initp

        def cal_RP(credg, distribution, no_unq_bls, no_ants, logamp, \
                   tilt_reg, gphase_reg, ant_pos_arr, obsvis, noise, initp):
            """Relative redundant calibration with doRelCalRP: constrained
            minimizer (by reducing the number of parameters) using polar
            coordinates

            :param credg: Grouped baselines, condensed so that antennas are
            consecutively labelled. See relabelAnts
            :type credg: ndarray
            :param distribution: Distribution to fit likelihood {'gaussian', 'cauchy'}
            :type distribution: str
            :param no_unq_bls: Number of unique baselines (equivalently the number of
            redundant visibilities)
            :type no_unq_bls: int
            :param no_ants: Number of antennas for given observation
            :type no_ants: int
            :param logamp: The logarithm of the amplitude initial parameters is taken,
            such that only positive solutions can be returned. Only if coords=="polar".
            :type logamp: bool
            :param tilt_reg: Add regularization term to constrain tilt shifts to 0
            :type tilt_reg: bool
            :param gphase_reg: Add regularization term to constrain the gain phase mean
            :type gphase_reg: bool
            :param ant_pos_arr: Array of filtered antenna position coordinates for the antennas
            in ants. See flt_ant_pos.
            :type ant_pos_arr: ndarray
            :param obsvis: Observed sky visibilities for a given frequency and given time,
            reformatted to have format consistent with redg
            :type obsvis: ndarray
            :param noise: Noise array to feed into log-likelihood calculations
            :type noise: ndarray
            :param initp: Initial parameter guesses for true visibilities and gains
            :type initp: ndarray, None

            :return: Optimization result for the solved antenna gains and true sky
            visibilities
            :rtype: Scipy optimization result object
            """
            res_rel, initp_ = doRelCalRP(credg, obsvis, no_unq_bls, no_ants, \
                distribution=distribution, noise=noise, constr_phase=True, \
                amp_constr='prod', bounded=True, logamp=logamp, tilt_reg=tilt_reg, \
                gphase_reg=gphase_reg, ant_pos_arr=gphase_reg, initp=initp)
            res_rel = {key: res_rel[key] for key in slct_keys}
            # use solution for next solve in iteration
            if res_rel['success']:
                initp = initp_
            return res_rel, initp

        if args.method.upper() == 'RP':
            RelCal = functools.partial(cal_RP, cRedG, args.dist, no_unq_bls, no_ants, \
                                       args.logamp, args.tilt_reg, args.gphase_reg, \
                                       ant_pos_arr)
            coords = 'polar'
        else:
            RelCal = functools.partial(cal, cRedG, args.dist, args.method, no_unq_bls, \
                                       no_ants, args.logamp, args.tilt_reg, \
                                       args.gphase_reg, ant_pos_arr)
            coords = args.method

        stdout = io.StringIO()
        with redirect_stdout(stdout):  # suppress output
            with open(out_csv, 'a') as f:  # write / append to csv file
                writer = DictWriter(f, fieldnames=header)
                if not csv_exists:
                    writer.writeheader()
                initp = None
                for i, iter_dim in enumerate(iter_dims):
                    if args.initp_jd is not None:
                        initp = rel_df_c.loc[(freq_chans[iter_dim[0]], iter_dim[2])]\
                                [len(slct_keys[:-1]):-2].values.astype(float)
                    if args.noise:
                        noisec = cNData[iter_dim[:2]]
                    res_rel, initp = RelCal(cData[iter_dim[:2]], noisec, initp)
                    # expanding out the solution
                    for j, param in enumerate(res_rel['x']):
                        res_rel[j] = param
                    # reset initp after each frequency slice
                    if not (i + 1) % no_tints and args.initp_jd is None:
                        initp = None
                    del res_rel['x']
                    res_rel.update({indices[0]:freq_chans[iter_dim[0]], \
                                    indices[1]:time_ints[iter_dim[1]]})
                    writer.writerow(res_rel)

        print('Relative calibration results saved to csv file {}'.format(
            out_csv))
        df = pd.read_csv(out_csv)
        if csv_exists:
            freqs = df['freq'].unique()
            tints = df['time_int'].unique()
            if cData.shape[0] != freqs.size or cData.shape[1] != tints.size:
                _, _, cData = group_data(zen_fn, args.pol, freqs, tints, \
                                         bad_ants, flag_path=flag_fn)
                cData = cData.data
        df.set_index(indices, inplace=True)
        # we now append the residuals as additional columns
        df = append_residuals_rel(df, cData, cRedG, coords, out_fn=None)
        if pkl_exists and not csv_exists:
            df = pd.concat([df, df_pkl])
        df.sort_values(by=indices, inplace=True)
        df.to_pickle(out_pkl)
        print('Relative calibration results dataframe pickled to {}'.format(
            out_pkl))

        # creating metadata file
        out_md = default_fn.rsplit('.', 1)[0] + '.md.pkl'
        if not os.path.exists(out_md):
            md = {'no_ants':no_ants, 'no_unq_bls':no_unq_bls, 'redg':RedG, \
                  'antpos':hd.antpos, 'last':hd.lsts, 'Nfreqs':hd.Nfreqs, \
                  'Ntimes':hd.Ntimes}
            with open(out_md, 'wb') as f:
                pickle.dump(md, f, protocol=pickle.HIGHEST_PROTOCOL)
            print(
                'Relative calibration metadata pickled to {}\n'.format(out_md))

    print('Script run time: {}'.format(datetime.datetime.now() - startTime))
Ejemplo n.º 9
0
def XDgroup_data(JD_time, JDs, pol, chans=None, tints=None, bad_ants=True, \
                 use_flags='first', noise=False, use_cal=None, verbose=False):
    """Returns redundant baseline grouping and reformatted dataset, with
    external flags applied, if specified

    :param JD_time: Julian time of 1st dataset, which sets times for others
    :type JD_time: str
    :param JDs: Julian days of data
    :type JDs: list, ndarray
    :param pol: Polarization of data
    :type pol: str
    :param chans: Frequency channel(s) {0, 1023} (None to choose all)
    :type chans: array-like, int, or None
    :param tints: Time integrations {0, 59} (None to choose all)
    :type tints: array-like, int, or None
    :param bad_ants: Flag known bad antennas, optional
    :type bad_ants: bool
    :param use_flags: Use flags to mask data
    :type use_flags: str
    :param noise: Also calculate noise from autocorrelations
    :type noise: bool
    :param use_cal: calfits file extension to use to calibrate data
    :type use_cal: str, None
    :param verbose: Print data gathering steps for each dataset
    :type verbose: bool

    :return hd: HERAData class
    :rtype hd: HERAData class
    :return redg: Grouped baselines, as returned by groupBls
    :rtype redg: ndarray
    :return cdata: Grouped visibilities with flags in numpy MaskedArray format,
    with format consistent with redg and dimensions (freq chans,
    time integrations, baselines)
    :rtype cdata: MaskedArray
    """

    if isinstance(chans, int):
        chans = np.asarray([chans])
    if isinstance(tints, int):
        tints = np.asarray([tints])

    zen_fn = find_zen_file(JD_time)
    flags_fn = find_flag_file(JD_time, use_flags)

    hd = HERAData(zen_fn)
    if tints is None:
        tints = np.arange(hd.Ntimes)

    if bad_ants:
        bad_ants = union_bad_ants(JDs)
    else:
        bad_ants = None

    if use_cal is None:
        cal_path = None
    else:
        cal_path = find_flag_file(JD_time, use_cal)

    if not verbose:
        grp_data = suppressOutput(group_data)
    else:
        grp_data = group_data

    grp = grp_data(zen_fn,
                   pol,
                   chans=chans,
                   tints=tints,
                   bad_ants=bad_ants,
                   flag_path=flags_fn,
                   noise=noise,
                   cal_path=cal_path)
    _, redg, cMData = grp[:3]

    cMData = cMData[np.newaxis, :]
    if noise:
        cNoise = grp[3]
        cNoise = cNoise[np.newaxis, :]

    JD_day = int(float(JD_time))
    if JD_day in JDs:
        JDs = list(JDs)
        JDs.remove(JD_day)

    for jd_i in JDs:
        JD_time_ia = match_lst(JD_time, jd_i)
        # aligning datasets in LAST
        last_df = pd.read_pickle(
            os.path.join(os.path.dirname(__file__), 'jd_lst_map_idr2.pkl'))
        last1 = last_df[last_df['JD_time'] == float(
            JD_time)]['LASTs'].values[0]
        last2 = last_df[last_df['JD_time'] == float(
            JD_time_ia)]['LASTs'].values[0]
        _, offset = find_nearest(last2, last1[0])
        tints_i = (tints + offset) % 60
        scnd_dataset = all(tints + offset > hd.Ntimes - 1)
        single_dataset = all(tints + offset < hd.Ntimes - 1) or scnd_dataset

        if not single_dataset:
            tints_ia, tints_ib = np.split(tints_i, np.where(tints_i == 0)[0])
        else:
            tints_ia = tints_i

        if scnd_dataset:
            next_row = numpy.where(
                last_df['JD_time'] == float(JD_time_ia))[0][0] + 1
            JD_time_ib = last_df.iloc[next_row]['JD_time']
            JD_time_ia = JD_time_ib

        JD_time_ia = check_jdt(JD_time_ia)
        zen_fn_ia = find_zen_file(JD_time_ia)
        flags_fn_ia = find_flag_file(JD_time_ia, use_flags)
        if use_cal is not None:
            cal_path_ia = find_flag_file(JD_time_ia, use_cal)
        else:
            cal_path_ia = None
        grp_a = grp_data(zen_fn_ia, pol, chans=chans, tints=tints_ia, \
                         bad_ants=bad_ants, flag_path=flags_fn_ia, noise=noise, \
                         cal_path=cal_path_ia)
        cMData_ia = grp_a[2]

        if not single_dataset:
            next_row = numpy.where(
                last_df['JD_time'] == float(JD_time_ia))[0][0] + 1
            JD_time_ib = last_df.iloc[next_row]['JD_time']
            JD_time_ib = check_jdt(JD_time_ib)
            zen_fn_ib = find_zen_file(JD_time_ib)
            flags_fn_ib = find_flag_file(JD_time_ib, use_flags)
            if use_cal is not None:
                cal_path_ib = find_flag_file(JD_time_ib, use_cal)
            else:
                cal_path_ib = None
            grp_b = grp_data(zen_fn_ib, pol, chans=chans, tints=tints_ib, \
                             bad_ants=bad_ants, flag_path=flags_fn_ib, \
                             noise=noise, cal_path=cal_path_ib)
            cMData_ib = grp_b[2]

            cMData_i = numpy.ma.concatenate((cMData_ia, cMData_ib), axis=1)
        else:
            cMData_i = cMData_ia

        cMData_i = cMData_i[np.newaxis, :]
        cMData = numpy.ma.concatenate((cMData, cMData_i), axis=0)

        if noise:
            cNoise_ia = grp_a[3]
            if not single_dataset:
                cNoise_ib = grp_b[3]
                cNoise_i = np.concatenate((cNoise_ia, cNoise_ib), axis=1)
            else:
                cNoise_i = cNoise_ia
            cNoise_i = cNoise_i[np.newaxis, :]
            cNoise = np.concatenate((cNoise, cNoise_i), axis=0)

    if noise:
        return hd, redg, cMData, cNoise
    else:
        return hd, redg, cMData
Ejemplo n.º 10
0
def generate_residual_IDR2_2(uvh5_file,
                             omni_vis,
                             omni_calfits,
                             abs_calfits,
                             outfile,
                             clobber=False):
    # reading uvh5 data file
    hd = HERAData(uvh5_file)
    data, flags, nsamples = hd.read(polarizations=['ee', 'nn'])

    # reading omnical model visibilities
    hd_oc = HERAData(omni_vis)
    omnivis, omnivis_flags, _ = hd_oc.read()

    uvo = pyuvdata.UVData()
    uvo.read_uvh5(omni_vis)

    # reading calfits file
    hc = HERACal(omni_calfits)
    oc_gains, oc_flags, oc_quals, oc_total_quals = hc.read()

    hc = HERACal(abs_calfits)
    ac_gains, ac_flags, ac_quals, ac_total_quals = hc.read()

    # calibrating the data
    abscal_data, abscal_flags = copy.deepcopy(data), copy.deepcopy(flags)
    calibrate_in_place(abscal_data,
                       ac_gains,
                       data_flags=abscal_flags,
                       cal_flags=ac_flags)

    res_data, res_flags = copy.deepcopy(hd.data_array), copy.deepcopy(
        hd.flag_array)
    resdata, resflags = copy.deepcopy(abscal_data), copy.deepcopy(abscal_flags)
    for i, p in enumerate(['ee', 'nn']):
        # reading omnical model visibilities
        hd_oc = HERAData(omni_vis)
        omnivis, omnivis_flags, _ = hd_oc.read(polarizations=[p])
        mod_bls = list(omnivis.keys())
        red_bls = get_reds(hd.antpos, pols=p)
        red = gr.RBL(red_bls)
        for mbl in mod_bls:
            bl_grp = red[tuple(mbl[0:2]) + ('J{}'.format(p), )]
            for blp in bl_grp:
                bl = (blp[0], blp[1], p)
                inds = hd.antpair2ind(bl)
                omnivis_scaled = omnivis[mbl] * oc_gains[(blp[0], 'J{}'.format(
                    p))] * np.conj(oc_gains[(blp[1], 'J{}'.format(p))])
                omnivis_scaled /= (
                    ac_gains[(blp[0], 'J{}'.format(p))] *
                    np.conj(ac_gains[(blp[1], 'J{}'.format(p))]))
                resdata[bl] = abscal_data[bl] - omnivis_scaled
                resflags[bl] = abscal_flags[bl]
                res_data[inds, 0, :, i] = resdata[bl]
                res_flags[inds, 0, :, i] = resflags[bl]

    # writing to file
    hd.data_array = res_data
    hd.flag_array = res_flags
    hd.write_uvh5(outfile, clobber=clobber)
Ejemplo n.º 11
0
data_file = data_directory
model_file = model_directory
flag_files = [f"/lustre/aoc/projects/hera/aewallwi/H1C_flags/{jd}.flags.h5" for jd in [2458098,2458099,2458101,2458102,2458103,2458104,2458105,2458106,
                                                                                                        2458107,2458108,2458109,2458110,2458111,2458112,2458113,2458114,
                                                                                                        2458115,2458116]]
bad_ants = [np.loadtxt(f"/users/kshahin/kshahin/HERA_Calibration/hera_pipelines/pipelines/h1c/idr2/v2/bad_ants/{ba}.txt") for ba in [2458098,2458099,2458101,2458102,2458103,2458104,2458105,2458106,2458107,2458108,
                                                                                                                          2458109,2458110,2458111,2458112,2458113,2458114,2458115,2458116]]
flag_file = flag_files[day]
bad_ant = bad_ants[day]
if not os.path.exists(f"/users/kshahin/kshahin/HERA_Calibration/DayfxP_{day}"):
    os.mkdir(f"/users/kshahin/kshahin/HERA_Calibration/DayfxP_{day}")
flags = UVFlag(flag_file)
flags.select(frequencies = flags.freq_array[(flags.freq_array>=115*1e+6) & (flags.freq_array<175*1e+6)])
flags.select(times=flags.time_array[2600:2660])
hd_data = HERAData(data_file)
freqs = hd_data.freqs[(hd_data.freqs>=115*1e+6) & (hd_data.freqs<175*1e+6)]
data, flag, nsample = hd_data.read(polarizations=["ee"], frequencies=freqs)
for bl in data:
    if (bl[0] == bad_ant).any() or (bl[1] == bad_ant).any():
                flag[bl] = np.ones_like(flag[bl])
    flag[bl] = flags.flag_array.squeeze()
hd_data.update(flags=flag)
hd_data.write_uvh5(f"/users/kshahin/kshahin/HERA_Calibration/DayfxP_{day}/data_{day}_{chunk}.uvh5", clobber=True)
del data, flag, nsample, hd_data
redcal.redcal_run(input_data=f"/users/kshahin/kshahin/HERA_Calibration/DayfxP_{day}/data_{day}_{chunk}.uvh5", clobber=True, solar_horizon=90, verbose=True)
abscal.post_redcal_abscal_run(data_file=f"/users/kshahin/kshahin/HERA_Calibration/DayfxP_{day}/data_{day}_{chunk}.uvh5", redcal_file=f"/users/kshahin/kshahin/HERA_Calibration/DayfxP_{day}/data_{day}_{chunk}.omni.calfits", model_files=[data_file], clobber=True, data_solar_horizon=90, model_solar_horizon=90)

cs=smooth_cal.CalibrationSmoother(calfits_list=sorted(glob.glob(f"/users/kshahin/kshahin/HERA_Calibration/DayfxP_{day}/data_{day}_*.abs.calfits")))
cs.time_freq_2D_filter(time_scale=21600)
cs.write_smoothed_cal(clobber=True, output_replace=(".abs.",".smooth_abs."))
Ejemplo n.º 12
0
def ant_metrics_run(files, pols=['xx', 'yy', 'xy', 'xy'], crossCut=5.0,
                    deadCut=5.0, alwaysDeadCut=10.0, metrics_path='',
                    extension='.ant_metrics.hdf5', vis_format='miriad',
                    verbose=True, history='',
                    run_mean_vij=True, run_red_corr=True,
                    run_cross_pols=True):
    """
    Run a series of ant_metrics tests on a given set of input files.

    Args:
        files: List of files to run ant metrics on.
               Can be any of the 4 polarizations
        pols: List of polarizations to perform metrics over.
             Allowed polarizations: 'xx', 'yy', 'xy', 'yx'
             Default: ['xx', 'yy', 'xy', 'yx']
        crossCut: Modified Z-Score limit to cut cross-polarized antennas.
                  Default: 5.0
        deadCut: Modifized Z-Score limit to cut dead antennas.
                 Default: 5.0
        alwaysDeadCut: Modified Z-Score limit for antennas that are definitely dead.
                       Antennas with above this limit are thrown away before iterative flagging.
                       Default: 10.0
        metrics_path: String path to directory to story output metric.
                      Default: same directy as input data files.
        extension: File extension to add to output files.
                   Default: ant_metrics.hdf5
        vis_format: File format of input visibility data.
                    Supports: 'miriad','uvfits', 'fhd', 'ms' (see pyuvdata docs)
                    Default: 'miriad'
        verbose: If True print out statements during iterative flagging
        history: The history the add to metrics.
                 Default
        run_mean_vij: Boolean flag which determines if mean_Vij_metrics is executed.
                      Default is True
        run_red_corr: Boolean flag which determines if red_corr_metrics is executed.
                      Default is True
        run_cross_pols: Boolean flag which determines if mean_Vij_cross_pol_metrics and red_corr_cross_pol_metrics are executed.
                      Default is True
    Return:
       None

    The funciton will take in a list of files and options.
    It will run the series of ant metrics tests,
    and produce an HDF5 file containing the relevant information.
    The file list need only contain one polarization type for a given JD,
    the function will look for the other polarizations in the same folder.
    If not all four polarizations are found, a warning is
    generated, since the code assumes all four polarizations are present.
    """
    from hera_cal.omni import aa_to_info
    from hera_cal.utils import get_aa_from_uv
    from hera_cal.io import HERAData

    # check the user asked to run anything
    if not any([run_mean_vij, run_red_corr, run_cross_pols]):
        raise AssertionError(("No Ant Metrics have been selected to run."
                              "Please set the correct keywords to run "
                              "the desired metrics."))

    # check that we were given some files to process
    if len(files) == 0:
        raise AssertionError('Please provide a list of visibility files')

    # generate a list of all files to be read in
    fullpol_file_list = utils.generate_fullpol_file_list(files, pols)
    if len(fullpol_file_list) == 0:
        raise AssertionError('Could not find all 4 polarizations '
                             'for any files provided')

    # generate aa object from file
    # N.B.: assumes redunancy information is the same for all files passed in
    first_file = fullpol_file_list[0][0]
    hd = HERAData(first_file, filetype='miriad')
    data, flags, nsamples = hd.read()
    aa = get_aa_from_uv(hd)
    del hd

    info = aa_to_info(aa, pols=[pols[-1][0]])
    reds = info.get_reds()

    # do the work
    for jd_list in fullpol_file_list:
        am = AntennaMetrics(jd_list, reds, fileformat=vis_format)
        am.iterative_antenna_metrics_and_flagging(crossCut=crossCut,
                                                  deadCut=deadCut,
                                                  alwaysDeadCut=alwaysDeadCut,
                                                  verbose=verbose,
                                                  run_mean_vij=run_mean_vij,
                                                  run_red_corr=run_red_corr,
                                                  run_cross_pols=run_cross_pols)

        # add history
        am.history = am.history + history

        base_filename = jd_list[0]
        abspath = os.path.abspath(base_filename)
        dirname = os.path.dirname(abspath)
        basename = os.path.basename(base_filename)
        nopol_filename = re.sub('\.{}\.'.format(pols[0]), '.', basename)
        if metrics_path == '':
            # default path is same directory as file
            metrics_path = dirname
        else:
            metrics_path = metrics_path
        metrics_basename = nopol_filename + extension
        metrics_filename = os.path.join(metrics_path, metrics_basename)
        am.save_antenna_metrics(metrics_filename)

    return
Ejemplo n.º 13
0
class AntennaMetrics():
    """Container for holding data and meta-data for ant metrics calculations.

    Object for holding relevant visibility data and metadata
    interfaces to four antenna metrics:
        Two each for identifying dead or cross-polarized antennas.
    Can iteratively identifying bad antennas
    Handles all metrics stroage, and supports writing metrics to HDF5.
    Works on raw data from a single observation with all four polarizations.
    """

    def __init__(self, dataFileList, reds, fileformat='miriad'):
        """Initilize an AntennaMetrics object.

        Arguments:
            dataFileList: List of data filenames of the four different visibility polarizations for the same observation
            reds: List of lists of tuples of antenna numbers that make up redundant baseline groups.
            format: File type of data
                    Supports: 'miriad','uvfits', 'fhd', 'ms ' (see pyuvdata docs)
                    Default: 'miriad'.
        """
        from hera_cal.io import HERAData

        if fileformat == 'miriad':
            self.hd = HERAData(dataFileList, filetype='miriad')
        elif fileformat == 'uvfits':
            self.hd = HERAData(dataFileList, filetype='uvfits')
        elif fileformat == 'fhd':
            raise NotImplemented(str(fileformat) + 'not supported')
        else:
            raise ValueError('Unrecognized file format ' + str(fileformat))

        self.data, self.flags, self.nsamples = self.hd.read()
        self.ants = self.hd.get_ants()
        self.pols = [pol.lower() for pol in self.hd.get_pols()]
        self.antpols = [antpol.lower() for antpol in self.hd.get_feedpols()]
        self.bls = self.hd.get_antpairs()
        self.dataFileList = dataFileList
        self.reds = reds
        self.version_str = hera_qm_version_str
        self.history = ''

        if len(self.antpols) is not 2 or len(self.pols) is not 4:
            raise ValueError('Missing polarization information. pols ='
                             + str(self.pols) + ' and antpols = '
                             + str(self.antpols))

    def mean_Vij_metrics(self, pols=None, xants=[], rawMetric=False):
        """Calculate how an antennas's average |Vij| deviates from others.

        Local wrapper for mean_Vij_metrics in hera_qm.ant_metrics module
        Arguments:

            pols : List of visibility polarizations (e.g. ['xx','xy','yx','yy'])
                   Default: self.pols
            xants: List of antennas ithat should be ignored.
                   format: (ant,antpol)
            rawMetric:return the raw mean Vij metric instead of the modified z-score

        Returns:
            meanMetrics: Dictionary indexed by (ant,antpol) of the modified z-score
                         of the mean of the absolute value of all visibilities associated with an antenna.
                         Very small or very large numbers are probably bad antennas.
        """
        if pols is None:
            pols = self.pols
        return mean_Vij_metrics(self.data, pols, self.antpols,
                                self.ants, self.bls, xants=xants,
                                rawMetric=rawMetric)

    def red_corr_metrics(self, pols=None, xants=[], rawMetric=False,
                         crossPol=False):
        """Calculate modified Z-Score over all redundant groups for each antenna.

        Local wrapper for red_corr_metrics in hera_qm.ant_metrics module.
        Calculates the extent to which baselines involving an antenna
        do not correlate with others they are nominmally redundant with.

        Arguments:
            data: data for all polarizations
                  format must support data[i,j,pol]
            pols: List of visibility polarizations (e.g. ['xx','xy','yx','yy']).
                  Default: self.pols
            xants: List of antennas that should be ignored.
                   format: (ant, antpol)
            rawMetric: return the raw power correlations instead of the modified z-score
            crossPol: return results only when the two visibility polarizations differ by a single flip

        Returns:
            powerRedMetric: Dictionary indexed by (ant,antpol)
                            of the modified z-scores of the mean power correlations
                            inside redundant baseline groups
                            associated with each antenna.
                            Very small numbers are probably bad antennas.
        """
        if pols is None:
            pols = self.pols
        return red_corr_metrics(self.data, pols, self.antpols,
                                self.ants, self.reds, xants=xants,
                                rawMetric=rawMetric, crossPol=crossPol)

    def mean_Vij_cross_pol_metrics(self, xants=[], rawMetric=False):
        """Calculate the ratio of cross-pol visibilities to same-pol visibilities.

        Local wrapper for mean_Vij_cross_pol_metrics.
        Find which antennas are outliers based on the
        ratio of mean cross-pol visibilities to mean same-pol visibilities:
        (Vxy+Vyx)/(Vxx+Vyy).

        Arguments:
            xants: List of antennas  that should be ignored.
                   format (ant,antpol) format
                   e.g.,  if (81,'y') is excluded, (81,'x') cannot be identified
                          as cross-polarized and will be excluded.
            rawMetric: return the raw power ratio instead of the modified z-score

        Returns:
            mean_Vij_cross_pol_metrics: Dictionary indexed by (ant,antpol)
                                        The modified z-scores of the
                                        ratio of mean visibilities,
                                        (Vxy+Vyx)/(Vxx+Vyy).
                                        Results duplicated in both antpols.
                                        Very large values are likely cross-polarized.
        """
        return mean_Vij_cross_pol_metrics(self.data, self.pols,
                                          self.antpols, self.ants,
                                          self.bls, xants=xants,
                                          rawMetric=rawMetric)

    def red_corr_cross_pol_metrics(self, xants=[], rawMetric=False):
        """Calculate modified Z-Score over redundant groups; assume cross-polarized.

        Local wrapper for red_corr_cross_pol_metrics.
        Find which antennas are part of visibilities that are significantly better
        correlated with polarization-flipped visibilities in a redundant groupself.
        Returns the modified z-score.

        Arguments:
            xants: List of antennas  that should be ignored.
                   format (ant,antpol) format
                   e.g.,  if (81,'y') is excluded, (81,'x') cannot be identified
                          as cross-polarized and will be excluded.
            rawMetric: return the raw power ratio instead of the modified z-score
                        type: Boolean
                        Default: False

        Returns:
            redCorrCrossPolMetrics: Dictionary indexed by (ant,antpol)
                                    The modified z-scores of the mean correlation
                                    ratio between redundant visibilities
                                    and singlely-polarization flipped ones.
                                    Very large values are probably cross-polarized.
        """
        return red_corr_cross_pol_metrics(self.data, self.pols,
                                          self.antpols, self.ants,
                                          self.reds, xants=xants,
                                          rawMetric=rawMetric)

    def reset_summary_stats(self):
        """Reset all the internal summary statistics back to empty."""
        self.xants, self.crossedAntsRemoved, self.deadAntsRemoved = [], [], []
        self.iter = 0
        self.removalIter = {}
        self.allMetrics, self.allModzScores = OrderedDict(), OrderedDict()
        self.finalMetrics, self.finalModzScores = {}, {}

    def find_totally_dead_ants(self):
        """Flag antennas whose median autoPower is 0.0.

        These antennas are marked as dead
        They do not appear in recorded antenna metrics or zscores.
        Their removal iteration is -1 (i.e. before iterative flagging).
        """
        autoPowers = compute_median_auto_power_dict(self.data,
                                                    self.pols,
                                                    self.reds)
        power_list_by_ant = {(ant, antpol): []
                             for ant in self.ants
                             for antpol in self.antpols
                             if (ant, antpol) not in self.xants}
        for ((ant0, ant1, pol), power) in autoPowers.items():
            if ((ant0, pol[0]) not in self.xants
                    and (ant1, pol[1]) not in self.xants):
                power_list_by_ant[(ant0, pol[0])].append(power)
                power_list_by_ant[(ant1, pol[1])].append(power)
        for (key, val) in power_list_by_ant.items():
            if np.median(val) == 0:
                self.xants.append(key)
                self.deadAntsRemoved.append(key)
                self.removalIter[key] = -1

    def _run_all_metrics(self, run_mean_vij=True, run_red_corr=True,
                         run_cross_pols=True):
        """Local call for all metrics as part of iterative flagging method.

        Arguments:
            run_mean_vij: Boolean flag which determines if mean_Vij_metrics is executed.
                          Default is True
            run_red_corr: Boolean flag which determines if red_corr_metrics is executed.
                          Default is True
            run_cross_pols: Boolean flag which determines if mean_Vij_cross_pol_metrics and red_corr_cross_pol_metrics are executed.
                          Default is True
        """
        # Compute all raw metrics
        metNames = []
        metVals = []

        if run_mean_vij:
            metNames.append('meanVij')
            meanVij = self.mean_Vij_metrics(pols=self.pols,
                                            xants=self.xants,
                                            rawMetric=True)
            metVals.append(meanVij)

        if run_red_corr:
            metNames.append('redCorr')

            redCorr = self.red_corr_metrics(pols=['xx', 'yy'],
                                            xants=self.xants,
                                            rawMetric=True)
            metVals.append(redCorr)

        if run_cross_pols:
            metNames.append('meanVijXPol')
            metNames.append('redCorrXPol')

            meanVijXPol = self.mean_Vij_cross_pol_metrics(xants=self.xants,
                                                          rawMetric=True)
            redCorrXPol = self.red_corr_cross_pol_metrics(xants=self.xants,
                                                          rawMetric=True)
            metVals.append(meanVijXPol)
            metVals.append(redCorrXPol)

        # Save all metrics and zscores
        metrics, modzScores = {}, {}
        for metric, metName in zip(metVals, metNames):
            metrics[metName] = metric
            modz = per_antenna_modified_z_scores(metric)
            modzScores[metName] = modz
            for key in metric:
                if metName in self.finalMetrics:
                    self.finalMetrics[metName][key] = metric[key]
                    self.finalModzScores[metName][key] = modz[key]
                else:
                    self.finalMetrics[metName] = {key: metric[key]}
                    self.finalModzScores[metName] = {key: modz[key]}
        self.allMetrics.update({self.iter: metrics})
        self.allModzScores.update({self.iter: modzScores})


    def iterative_antenna_metrics_and_flagging(self, crossCut=5, deadCut=5,
                                               alwaysDeadCut=10,
                                               verbose=False,
                                               run_mean_vij=True,
                                               run_red_corr=True,
                                               run_cross_pols=True):
        """Run all four antenna metrics and stores results in self.

        Runs all four metrics:
            Two for dead antennas
            Two for cross-polarized antennas
        Saves the results internally to this this antenna metrics object.

        Arguments:
            crossCut: Modified z-score cut for most cross-polarized antennas.
                      Default 5 "sigmas".
            deadCut: Modified z-score cut for most likely dead antennas.
                      Default 5 "sigmas".
            alwaysDeadCut: Modified z-score cut for definitely dead antennas.
                           Default 10 "sigmas".
                           These are all thrown away at once without waiting
                           to iteratively throw away only the worst offender.
            run_mean_vij: Boolean flag which determines if mean_Vij_metrics is executed.
                          Default is True
            run_red_corr: Boolean flag which determines if red_corr_metrics is executed.
                          Default is True
            run_cross_pols: Boolean flag which determines if mean_Vij_cross_pol_metrics and red_corr_cross_pol_metrics are executed.
                          Default is True
        """
        self.reset_summary_stats()
        self.find_totally_dead_ants()
        self.crossCut, self.deadCut = crossCut, deadCut
        self.alwaysDeadCut = alwaysDeadCut

        # Loop over
        for n in range(len(self.antpols) * len(self.ants)):
            self.iter = n
            self._run_all_metrics(run_mean_vij=run_mean_vij,
                                  run_red_corr=run_red_corr,
                                  run_cross_pols=run_cross_pols)

            # Mostly likely dead antenna
            last_iter = list(self.allModzScores)[-1]
            worstDeadCutRatio = None
            worstCrossCutRatio = None
            if run_mean_vij and run_red_corr:
                deadMetrics = average_abs_metrics(self.allModzScores[last_iter]['meanVij'],
                                                  self.allModzScores[last_iter]['redCorr'])
                worstDeadAnt = max(deadMetrics, key=deadMetrics.get)
                worstDeadCutRatio = np.abs(deadMetrics[worstDeadAnt]) / deadCut
            else:
                if run_mean_vij:
                    deadMetrics = self.allModzScores[last_iter]['meanVij'].copy()
                    worstDeadAnt = max(deadMetrics, key=deadMetrics.get)
                    worstDeadCutRatio = (np.abs(deadMetrics[worstDeadAnt])
                                         / deadCut)
                elif run_red_corr:
                    deadMetrics = self.allModzScores[last_iter]['redCorr'].copy()
                    worstDeadAnt = max(deadMetrics, key=deadMetrics.get)
                    worstDeadCutRatio = (np.abs(deadMetrics[worstDeadAnt])
                                         / deadCut)
            if run_cross_pols:
                # Most likely cross-polarized antenna
                crossMetrics = average_abs_metrics(self.allModzScores[last_iter]['meanVijXPol'],
                                                   self.allModzScores[last_iter]['redCorrXPol'])
                worstCrossAnt = max(crossMetrics, key=crossMetrics.get)
                worstCrossCutRatio = (np.abs(crossMetrics[worstCrossAnt])
                                      / crossCut)

            # Find the single worst antenna, remove it, log it, and run again
            if (worstCrossCutRatio >= worstDeadCutRatio
                    and worstCrossCutRatio >= 1.0):
                for antpol in self.antpols:
                    self.xants.append((worstCrossAnt[0], antpol))
                    self.crossedAntsRemoved.append((worstCrossAnt[0], antpol))
                    self.removalIter[(worstCrossAnt[0], antpol)] = n
                    if verbose:
                        print('On iteration', n, 'we flag\t', end='')
                        print((worstCrossAnt[0], antpol))
            elif (worstDeadCutRatio > worstCrossCutRatio
                    and worstDeadCutRatio > 1.0):
                dead_ants = set([worstDeadAnt])
                for (ant, metric) in deadMetrics.items():
                    if metric > alwaysDeadCut:
                        dead_ants.add(ant)
                for dead_ant in dead_ants:
                    self.xants.append(dead_ant)
                    self.deadAntsRemoved.append(dead_ant)
                    self.removalIter[dead_ant] = n
                    if verbose:
                        print('On iteration', n, 'we flag', dead_ant)
            else:
                break

    def save_antenna_metrics(self, filename):
        """Output all meta-metrics and cut decisions to HDF5 file.

        Saves all cut decisions and meta-metrics in an HDF5 that can be loaded
        back into a dictionary using hera_qm.ant_metrics.load_antenna_metrics()

        Arguments:
            filename: The file into which metrics will be written.
        """
        if not hasattr(self, 'xants'):
            raise KeyError(('Must run AntennaMetrics.'
                            'iterative_antenna_metrics_and_flagging() first.'))

        out_dict = {'xants': self.xants}
        out_dict['crossed_ants'] = self.crossedAntsRemoved
        out_dict['dead_ants'] = self.deadAntsRemoved
        out_dict['final_metrics'] = self.finalMetrics
        out_dict['all_metrics'] = self.allMetrics
        out_dict['final_mod_z_scores'] = self.finalModzScores
        out_dict['all_mod_z_scores'] = self.allModzScores
        out_dict['removal_iteration'] = self.removalIter
        out_dict['cross_pol_z_cut'] = self.crossCut
        out_dict['dead_ant_z_cut'] = self.deadCut
        out_dict['always_dead_ant_z_cut'] = self.alwaysDeadCut
        out_dict['datafile_list'] = self.dataFileList
        out_dict['reds'] = self.reds

        metrics_io.write_metric_file(filename, out_dict)
Ejemplo n.º 14
0
def main():
    parser = argparse.ArgumentParser(formatter_class=argparse.\
    RawDescriptionHelpFormatter, description=textwrap.dedent("""
    Across days relative redundant calibration of visibilities

    Takes HERA visibility datasets across several JDs in uvh5 file format,
    aligns them in LAST and then performs relative redundant calibration
    (up to the overall amplitude, overall phase, and phase gradient degenerate
    parameters) for each frequency channel and each time integration in the dataset.

    Returns a pickled pandas dataframe of the Scipy optimization results for
    the relative redundant calibration for each set of frequency channel and
    time integration.
    """))
    parser.add_argument('jd_time',
                        help='Fractional JD time of dataset to \
                        align other dataframes to',
                        metavar='JD',
                        type=str)
    parser.add_argument('-j', '--jds', required=True, metavar='J', \
                        type=str, help='JDs to calibrate')
    parser.add_argument('-p', '--pol', required=True, metavar='P', type=str, \
                        help='Polarization {"ee", "en", "nn", "ne"}')
    parser.add_argument('-c', '--chans', required=False, default=None, metavar='C', \
                        type=str, help='Frequency channels to calibrate \
                        {0, 1023}'                                  )
    parser.add_argument('-t', '--tints', required=False, default=None, metavar='T', \
                        type=str, help='Time integrations to calibrate \
                        {0, 59}'                                )
    parser.add_argument('-f', '--flag_type', required=False, default='first', \
                        metavar='F', type=str, help='Flag type e.g. "first", \
                        "omni", "abs"'                                      )
    parser.add_argument('-d', '--dist', required=True, metavar='D', \
                        type=str, help='Fitting distribution for calibration \
                        {"cauchy", "gaussian"}'                                               )
    parser.add_argument('-v', '--noise', required=False, action='store_true', \
                        help='Use noise from autos in nlogL calculations')
    parser.add_argument('-cf', '--chan_flag_pct', required=False, default=None, \
                        metavar='CFP', type=float, help='Flag channel if more than \
                        X% of day/time slices for a given channel are flagged'                                                                              )
    parser.add_argument('-o', '--out', required=False, default=None, \
                        metavar='O', type=str, help='Output csv and df name')
    parser.add_argument('-u', '--out_dir', required=False, default=None, metavar='U', \
                        type=str, help='Out directory to store dataframe')
    parser.add_argument('-n', '--new_df', required=False, action='store_true', \
                        help='Write data to a new dataframe')
    parser.add_argument('-k', '--compression', required=False, default=None, metavar='K', \
                        type=str, help='Compression to use when pickling results dataframe')
    args = parser.parse_args()

    startTime = datetime.datetime.now()

    zen_fn = find_zen_file(args.jd_time)
    hd = HERAData(zen_fn)

    out_fn = args.out
    default_fn = 'xd_rel_df.{}.{}.{}'.format('{:.4f}'.format(hd.lsts[0]), \
                                             args.pol, args.dist)
    if out_fn is None:
        out_fn = default_fn
    if args.out_dir is not None:
        if not os.path.exists(args.out_dir):
            os.mkdir(args.out_dir)
        out_fn = os.path.join(args.out_dir, out_fn)
        if out_fn is not None:
            default_fn = os.path.join(args.out_dir, default_fn)

    out_csv = fn_format(out_fn, 'csv')
    out_pkl = out_csv.rsplit('.', 1)[0] + '.pkl'
    csv_exists = os.path.exists(out_csv)
    pkl_exists = os.path.exists(out_pkl)
    if csv_exists or pkl_exists:
        if args.new_df:
            out_csv = new_fn(out_csv, None, startTime)
            out_pkl = out_csv.rsplit('.', 1)[0] + '.pkl'
            csv_exists = False
            pkl_exists = False

    JDs = args.jds
    if JDs == 'idr2_jds':
        JDs = numpy.asarray(idr2_jds)
    elif JDs == 'idr2_jdsx':
        JDs = numpy.asarray(idr2_jdsx)
    else:
        if '_' in JDs:
            JDs = numpy.asarray(JDs.split('_'), dtype=int)
        else:
            JDs = mod_str_arg(JDs)
        JDs = numpy.intersect1d(JDs, idr2_jds)

    freq_chans = mod_str_arg(args.chans)
    time_ints = mod_str_arg(args.tints)

    pchans = args.chans
    if pchans is None:
        pchans = '0~{}'.format(hd.Nfreqs - 1)
    ptints = args.tints
    if ptints is None:
        ptints = '0~{}'.format(hd.Ntimes - 1)
    print('Running relative redundant calibration across JDs {} between LASTS '\
          '{:.4f} and {:.4f} for polarization {}, frequency channel(s) {} '\
          'and time integration(s) {}, with {} assumed noise distribution.\n'.\
          format(' '.join(map(str, JDs)), hd.lsts[0], hd.lsts[-1], args.pol, \
                 pchans, ptints, args.dist))

    if freq_chans is None:
        freq_chans = numpy.arange(hd.Nfreqs)
    if time_ints is None:
        time_ints = numpy.arange(hd.Ntimes)

    indices = ['freq', 'time_int']

    no_tints = len(time_ints)
    iter_dims = list(numpy.ndindex((len(freq_chans), no_tints)))
    skip_cal = False
    # skipping freqs and tints that are already in the dataframe
    if csv_exists or pkl_exists:
        cmap_f = dict(map(reversed, enumerate(freq_chans)))
        cmap_t = dict(map(reversed, enumerate(time_ints)))
        if csv_exists:
            df = pd.read_csv(out_csv, usecols=indices)
            idx_arr = df.values
        elif pkl_exists:
            df_pkl = pd.read_pickle(out_pkl)
            idx_arr = df_pkl.index.values
        done = [(cmap_f[f], cmap_t[t]) for (f, t) in idx_arr if (f in freq_chans \
        and t in time_ints)]
        iter_dims = [idim for idim in iter_dims if idim not in done]
        if not any(iter_dims):
            print('Solutions to all specified frequency channels and time '\
                  'integrations already exist in {}\n'.format(out_pkl))
            skip_cal = True

    if not skip_cal:
        stdout = io.StringIO()
        with redirect_stdout(stdout):  # suppress output
            grp = XDgroup_data(args.jd_time, JDs, args.pol, chans=freq_chans, \
                            tints=time_ints, use_flags=args.flag_type, \
                            noise=args.noise)
        if not args.noise:
            _, RedG, cData = grp
            noisec = None
        else:
            _, RedG, cData, cNData = grp

        flags = cData.mask
        cData = cData.data

        # to get fields for the csv header
        ants = numpy.unique(RedG[:, 1:])
        no_ants = ants.size
        no_unq_bls = numpy.unique(RedG[:, 0]).size
        cRedG = relabelAnts(RedG)
        psize = (no_ants * JDs.size + no_unq_bls) * 2

        # discarding 'jac', 'hess_inv', 'nfev', 'njev'
        slct_keys = ['success', 'status', 'message', 'fun', 'nit', 'x']
        header = slct_keys[:-1] + list(numpy.arange(psize)) + indices

        # remove flagged channels from iter_dims
        if isinstance(flags, numpy.bool_):
            # If all flags are the same
            flags = [flags]
        if True in flags:
            if args.chan_flag_pct is None:
                flg_chans = numpy.unique(
                    numpy.where(flags.all(axis=(0, 2, 3)))[0])
                print('Flagged channels across all days are: {}\n'.\
                      format(freq_chans[flg_chans]))
            else:
                flg_pct = args.chan_flag_pct / 100
                flg_chans = numpy.unique(numpy.where(flags.all(axis=3).mean(axis=(0, 2)) \
                                                     > flg_pct)[0])
                print('Flagged channels across all days and those that are '\
                      'more than {}% flagged for their given day/time slice are: {}\n'.\
                      format(args.chan_flag_pct, freq_chans[flg_chans] ))
            iter_dims = [
                idim for idim in iter_dims if idim[0] not in flg_chans
            ]
            if not iter_dims:  # check if slices to solve are empty
                print('All specified channels are flagged. Exiting.')
                sys.exit()

        def cal(credg, distribution, no_unq_bls, no_ants, obsvis, noise,
                initp):
            """Relative redundant calibration across days with doRelCalD:
            default implementation with unconstrained minimizer using cartesian
            coordinates
            """
            res_rel, initp_new = doRelCalD(credg, obsvis, no_unq_bls, no_ants, \
                distribution=distribution, noise=noise, initp=initp, \
                return_initp=True, xd=True)
            res_rel = {key: res_rel[key] for key in slct_keys}
            # use solution for next solve in iteration
            if res_rel['success']:
                initp = initp_new
            return res_rel, initp

        RelCal = functools.partial(cal, cRedG, args.dist, no_unq_bls, no_ants)

        with redirect_stdout(stdout):  # suppress output
            with open(out_csv, 'a') as f:  # write / append to csv file
                writer = DictWriter(f, fieldnames=header)
                if not csv_exists:
                    writer.writeheader()
                initp = None
                for i, iter_dim in enumerate(iter_dims):
                    if args.noise:
                        noisec = cNData[:, iter_dim[0], iter_dim[1], :]
                    res_rel, initp = RelCal(cData[:, iter_dim[0], iter_dim[1], :], \
                                            noisec, initp)
                    # expanding out the solution
                    for j, param in enumerate(res_rel['x']):
                        res_rel[j] = param
                    # reset initp after each frequency slice
                    if not (i + 1) % no_tints:
                        initp = None
                    del res_rel['x']
                    res_rel.update({indices[0]:freq_chans[iter_dim[0]], \
                                    indices[1]:time_ints[iter_dim[1]]})
                    writer.writerow(res_rel)

        print('Relative calibration results saved to csv file {}'.format(
            out_csv))
        df = pd.read_csv(out_csv)
        if csv_exists:
            freqs = df['freq'].unique()
            tints = df['time_int'].unique()
            if cData.shape[0] != freqs.size or cData.shape[1] != tints.size:
                _, _, cData = XDgroup_data(args.jd_time, JDs, args.pol, chans=freqs,
                                           tints=tints, use_flags=args.flag_type, \
                                           noise=None)
                cData = cData.data
        df.set_index(indices, inplace=True)
        # we now append the residuals as additional columns
        df = append_residuals_rel(df, cData, cRedG, 'cartesian', out_fn=None)
        if pkl_exists and not csv_exists:
            df = pd.concat([df, df_pkl])
        df.sort_values(by=indices, inplace=True)
        if args.compression is not None:
            out_pkl += '.{}'.format(args.compression)
            print('{} compression used in pickling the dataframe'.format(
                args.compression))
        df.to_pickle(out_pkl, compression=args.compression)
        print('Relative calibration results dataframe pickled to {}'.format(
            out_pkl))

        # creating metadata file
        out_md = default_fn.rsplit('.', 1)[0] + '.md.pkl'
        if not os.path.exists(out_md):
            md = {'no_ants':no_ants, 'no_unq_bls':no_unq_bls, 'redg':RedG, \
                  'antpos':hd.antpos, 'last':hd.lsts, 'Nfreqs':hd.Nfreqs, \
                  'Ntimes':hd.Ntimes, 'JDs':JDs}
            with open(out_md, 'wb') as f:
                pickle.dump(md, f, protocol=pickle.HIGHEST_PROTOCOL)
            print(
                'Relative calibration metadata pickled to {}\n'.format(out_md))

    print('Script run time: {}'.format(datetime.datetime.now() - startTime))