def _load_a_common_dataset(self, name):
     ### load a common dataset from the first file
     if name == 'channo' and not self._channel_select is None:
         self.create_dataset(name, data=self._channel_select)
         memh5.copyattrs(self.infiles[0][name].attrs, self[name].attrs)
     else:
         super(RawTimestream, self)._load_a_common_dataset(name)
 def _load_a_common_dataset(self, name):
     ### load a common dataset from the first file
     if name == 'channo' and not self._channel_select is None:
         self.create_dataset(name, data=self._channel_select)
         memh5.copyattrs(self.infiles[0][name].attrs, self[name].attrs)
     else:
         super(RawTimestream, self)._load_a_common_dataset(name)
 def _copy_a_common_dataset(self, name, other):
     ### copy a common dataset from `other` to self
     if name == 'channo' and not other._subset_channel_select is None:
         self.create_dataset(name, data=other._subset_channel_select)
         memh5.copyattrs(other[name].attrs, self[name].attrs)
     else:
         super(RawTimestream, self)._copy_a_common_dataset(name, other)
Exemple #4
0
 def _load_a_special_common_dataset(self, name, axis_name):
     ### load a common dataset that need to take specail care
     ### this dataset need to be distributed along axis_name if axis_name
     ### is just self.main_data_dist_axis
     dset = self.infiles[0][name]
     axis = self.main_data_axes.index(axis_name)
     tmp = np.arange(dset.shape[0])
     sel = tmp[self.main_data_select[axis]].tolist()
     data = dset[sel]
     # if axis_name is just the distributed axis, load dataset distributed
     if axis == self.main_data_dist_axis:
         data  = mpiarray.MPIArray.from_numpy_array(data, axis=self.main_axes_ordered_datasets[name].index(axis))
     self.create_dataset(name, data=data)
     # copy attrs of this dset
     memh5.copyattrs(dset.attrs, self[name].attrs)
 def _load_a_special_common_dataset(self, name, axis_name):
     ### load a common dataset that need to take specail care
     ### this dataset need to be distributed along axis_name if axis_name
     ### is just self.main_data_dist_axis
     dset = self.infiles[0][name]
     axis = self.main_data_axes.index(axis_name)
     tmp = np.arange(dset.shape[0])
     sel = tmp[self.main_data_select[axis]].tolist()
     data = dset[sel]
     # if axis_name is just the distributed axis, load dataset distributed
     if axis == self.main_data_dist_axis:
         data  = mpiarray.MPIArray.from_numpy_array(data, axis=self.main_axes_ordered_datasets[name].index(axis))
     self.create_dataset(name, data=data)
     # copy attrs of this dset
     memh5.copyattrs(dset.attrs, self[name].attrs)
Exemple #6
0
    def stokes2lin(self):
        """Convert the Stokes polarized data to linear polarization."""
        try:
            pol = self.pol
        except KeyError:
            raise RuntimeError('Polarization of the data is unknown, can not convert')

        if pol.attrs['pol_type'] == 'linear' and pol.shape[0] == 4:
            warning.warn('Data is already linear polarization, no need to convert')
            return

        if pol.attrs['pol_type'] == 'stokes' and pol.shape[0] == 4:

            # redistribute to 0 axis if polarization is the distributed axis
            original_dist_axis = self.main_data_dist_axis
            if 'polarization' == self.main_data_axes[self.main_data_dist_axis]:
                self.redistribute(0)

            pol = pol[:].tolist()
            p = self.pol_dict

            # create a new MPIArray to hold the new data
            md = mpiarray.MPIArray(self.main_data.shape, axis=self.main_data_dist_axis, comm=self.comm, dtype=self.main_data.dtype)
            # convert to linear xx, yy, xy, yx
            md.local_array[:, :, 0] = self.main_data.local_data[:, :, pol.index(p['I'])] + self.main_data.local_data[:, :, pol.index(p['Q'])] # xx
            md.local_array[:, :, 1] = self.main_data.local_data[:, :, pol.index(p['I'])] - self.main_data.local_data[:, :, pol.index(p['Q'])] # yy
            md.local_array[:, :, 2] = self.main_data.local_data[:, :, pol.index(p['U'])] + 1.0J * self.main_data.local_data[:, :, pol.index(p['V'])] # xy
            md.local_array[:, :, 3] = self.main_data.local_data[:, :, pol.index(p['U'])] - 1.0J * self.main_data.local_data[:, :, pol.index(p['V'])] # yx

            attr_dict = {} # temporarily save attrs of this dataset
            memh5.copyattrs(self.main_data.attrs, attr_dict)
            del self[self.main_data_name]
            # create main data
            self.create_dataset(self.main_data_name, shape=md.shape, dtype=md.dtype, data=md, distributed=True, distributed_axis=self.main_data_dist_axis)
            memh5.copyattrs(attr_dict, self.main_data.attrs)

            del self['pol']
            self.create_dataset('pol', data=np.array([p['xx'], p['yy'], p['xy'], p['yx']]), dtype='i4')
            self['pol'].attrs['pol_type'] = 'linear'

            # redistribute self to original axis
            self.redistribute(original_dist_axis)

        else:
            raise RuntimeError('Can not convert to linear polarization')
Exemple #7
0
    def stokes2lin(self):
        """Convert the Stokes polarized data to linear polarization."""
        try:
            pol = self.pol
        except KeyError:
            raise RuntimeError('Polarization of the data is unknown, can not convert')

        if pol.attrs['pol_type'] == 'linear' and pol.shape[0] == 4:
            warning.warn('Data is already linear polarization, no need to convert')
            return

        if pol.attrs['pol_type'] == 'stokes' and pol.shape[0] == 4:
            pol = pol[:].tolist()

            # redistribute to 0 axis if polarization is the distributed axis
            original_dist_axis = self.main_data_dist_axis
            if 'polarization' == self.main_data_axes[self.main_data_dist_axis]:
                self.redistribute(0)

            # create a new MPIArray to hold the new data
            md = mpiarray.MPIArray(self.main_data.shape, axis=self.main_data_dist_axis, comm=self.comm, dtype=self.main_data.dtype)
            # convert to linear xx, yy, xy, yx
            md.local_array[:, :, 0] = self.main_data.local_data[:, :, pol.index('I')] + self.main_data.local_data[:, :, pol.index('Q')] # xx
            md.local_array[:, :, 1] = self.main_data.local_data[:, :, pol.index('I')] - self.main_data.local_data[:, :, pol.index('Q')] # yy
            md.local_array[:, :, 2] = self.main_data.local_data[:, :, pol.index('U')] + 1.0J * self.main_data.local_data[:, :, pol.index('V')] # xy
            md.local_array[:, :, 3] = self.main_data.local_data[:, :, pol.index('U')] - 1.0J * self.main_data.local_data[:, :, pol.index('V')] # yx

            attr_dict = {} # temporarily save attrs of this dataset
            memh5.copyattrs(self.main_data.attrs, attr_dict)
            del self[self.main_data_name]
            # create main data
            self.create_dataset(self.main_data_name, shape=md.shape, dtype=md.dtype, data=md, distributed=True, distributed_axis=self.main_data_dist_axis)
            memh5.copyattrs(attr_dict, self.main_data.attrs)

            del self['pol']
            self.create_dataset('pol', data=np.array(['xx', 'yy', 'xy', 'yx']))
            self['pol'].attrs['pol_type'] = 'linear'

            # redistribute self to original axis
            self.redistribute(original_dist_axis)

        else:
            raise RuntimeError('Can not conver to linear polarization')
Exemple #8
0
 def _load_a_common_dataset(self, name):
     ### load a common dataset from the first file
     if name in self.freq_ordered_datasets.keys():
         self._load_a_special_common_dataset(name, 'frequency')
     elif name in self.bl_ordered_datasets.keys():
         self._load_a_special_common_dataset(name, 'baseline')
     elif name == 'feedno' and not self._feed_select is None:
         self.create_dataset(name, data=self._feed_select)
         memh5.copyattrs(self.infiles[0][name].attrs, self[name].attrs)
     elif name in self.feed_ordered_datasets.keys() and not self._feed_select is None:
         fh = self.infiles[0]
         feedno = fh['feedno'][:].tolist()
         feed_inds = [ feedno.index(fd) for fd in self._feed_select ]
         feed_axis = self.feed_ordered_datasets[name].index(0)
         slc = [slice(0, None)] * (feed_axis + 1)
         slc[feed_axis] = feed_inds
         self.create_dataset(name, data=fh[name][tuple(slc)])
         memh5.copyattrs(self.infiles[0][name].attrs, self[name].attrs)
     else:
         super(TimestreamCommon, self)._load_a_common_dataset(name)
 def _load_a_common_dataset(self, name):
     ### load a common dataset from the first file
     if name in self.freq_ordered_datasets.keys():
         self._load_a_special_common_dataset(name, 'frequency')
     elif name in self.bl_ordered_datasets.keys():
         self._load_a_special_common_dataset(name, 'baseline')
     elif name == 'feedno' and not self._feed_select is None:
         self.create_dataset(name, data=self._feed_select)
         memh5.copyattrs(self.infiles[0][name].attrs, self[name].attrs)
     elif name in self.feed_ordered_datasets.keys() and not self._feed_select is None:
         fh = self.infiles[0]
         feedno = fh['feedno'][:].tolist()
         feed_inds = [ feedno.index(fd) for fd in self._feed_select ]
         feed_axis = self.feed_ordered_datasets[name].index(0)
         slc = [slice(0, None)] * (feed_axis + 1)
         slc[feed_axis] = feed_inds
         self.create_dataset(name, data=fh[name][tuple(slc)])
         memh5.copyattrs(self.infiles[0][name].attrs, self[name].attrs)
     else:
         super(TimestreamCommon, self)._load_a_common_dataset(name)
Exemple #10
0
    def __init__(self, *args, **kwargs):

        # Pull out the values of needed arguments
        axes_from = kwargs.pop('axes_from', None)
        attrs_from = kwargs.pop('attrs_from', None)
        dist = kwargs.pop('distributed', True)
        comm = kwargs.pop('comm', None)

        # Run base initialiser
        memh5.BasicCont.__init__(self, distributed=dist, comm=comm)

        # Check to see if this call looks like it was called like
        # memh5.MemDiskGroup would have been. If it is, we're probably trying to
        # create a bare container, so don't initialise any datasets. This
        # behaviour is needed to support tod.concatenate
        if len(args) or 'data_group' in kwargs:
            return

        # Create axis entries
        for axis in self._axes:

            axis_map = None

            # Check if axis is specified in initialiser
            if axis in kwargs:

                # If axis is an integer, turn into an arange as a default definition
                if isinstance(kwargs[axis], int):
                    axis_map = np.arange(kwargs[axis])
                else:
                    axis_map = kwargs[axis]

            # If not set in the arguments copy from another object if set
            elif axes_from is not None and axis in axes_from.index_map:
                axis_map = axes_from.index_map[axis]

            # Set the index_map[axis] if we have a definition, otherwise throw an error
            if axis_map is not None:
                self.create_index_map(axis, axis_map)
            else:
                raise RuntimeError('No definition of axis %s supplied.' % axis)

        # Iterate over datasets and initialise any that specify it
        for name, spec in self._dataset_spec.items():
            if 'initialise' in spec and spec['initialise']:
                self.add_dataset(name)

        # Copy over attributes
        if attrs_from is not None:

            # Copy attributes from container root
            memh5.copyattrs(attrs_from.attrs, self.attrs)

            # Copy attributes over from any common datasets
            for name in self._dataset_spec.keys():
                if name in self.datasets and name in attrs_from.datasets:
                    memh5.copyattrs(attrs_from.datasets[name].attrs,
                                    self.datasets[name].attrs)

            # Make sure that the __memh5_subclass attribute is accurate
            clspath = self.__class__.__module__ + '.' + self.__class__.__name__
            clsattr = self.attrs.get('__memh5_subclass', None)
            if clsattr and (clsattr != clspath):
                self.attrs['__memh5_subclass'] = clspath
Exemple #11
0
def make_empty_corrdata(
    freq=None,
    input=None,
    time=None,
    axes_from=None,
    attrs_from=None,
    distributed=True,
    distributed_axis=0,
    comm=None,
):
    """Make an empty CorrData (i.e. timestream) container.

    Parameters
    ----------
    freq : np.ndarray, optional
        Frequency map to use.
    input : np.ndarray, optional
        Input map.
    time : np.ndarray, optional
        Time map.
    axes_from : BasicCont, optional
        Another container to copy any unspecified axes from.
    attrs_from : BasicCont, optional
        Another container to copy any unspecified attributes from.
    distributed : boolean, optional
        Whether to create the container in distributed mode.
    distributed_axis : int, optional
        Axis to distribute over.
    comm : MPI.Comm, optional
        MPI communicator to distribute over.

    Returns
    -------
    data : andata.CorrData
    """

    # Setup frequency axis
    if freq is None:
        if axes_from is not None and "freq" in axes_from.index_map:
            freq = axes_from.index_map["freq"]
        else:
            raise RuntimeError("No frequency axis defined.")

    # Setup input axis
    if input is None:
        if axes_from is not None and "input" in axes_from.index_map:
            input = axes_from.index_map["input"]
        else:
            raise RuntimeError("No input axis defined.")

    # Setup time axis
    if time is None:
        if axes_from is not None and "time" in axes_from.index_map:
            time = axes_from.index_map["time"]
        else:
            raise RuntimeError("No time axis defined.")

    # Create CorrData object and setup axies
    from ch_util import andata

    # Initialise distributed container
    data = andata.CorrData.__new__(andata.CorrData)
    memh5.BasicCont.__init__(data, distributed=True, comm=comm)

    # Copy over attributes
    if attrs_from is not None:
        memh5.copyattrs(attrs_from.attrs, data.attrs)

    # Create index map
    data.create_index_map("freq", freq)
    data.create_index_map("input", input)
    data.create_index_map("time", time)

    # Construct and create product map
    if axes_from is not None and "prod" in axes_from.index_map:
        prodmap = axes_from.index_map["prod"]
    else:
        nfeed = len(input)
        prodmap = np.array([[fi, fj] for fi in range(nfeed)
                            for fj in range(fi, nfeed)])
    data.create_index_map("prod", prodmap)

    # Construct and create stack map
    if axes_from is not None and "stack" in axes_from.index_map:
        stackmap = axes_from.index_map["stack"]
        vis_shape = (data.nfreq, len(stackmap), data.ntime)
        vis_axis = np.array(["freq", "stack", "time"])
    else:
        stackmap = np.empty_like(prodmap,
                                 dtype=[("prod", "<u4"), ("conjugate", "u1")])
        stackmap["prod"][:] = np.arange(len(prodmap))
        stackmap["conjugate"] = 0
        vis_shape = (data.nfreq, data.nprod, data.ntime)
        vis_axis = np.array(["freq", "prod", "time"])
    data.create_index_map("stack", stackmap)

    # Construct and create reverse map stack
    if axes_from is not None and "stack" in axes_from.reverse_map:
        reverse_map_stack = axes_from.reverse_map["stack"]
        data.create_reverse_map("stack", reverse_map_stack)

    # Determine datatype for weights
    if ((axes_from is not None) and hasattr(axes_from, "flags")
            and ("vis_weight" in axes_from.flags)):
        weight_dtype = axes_from.flags["vis_weight"].dtype
    else:
        weight_dtype = np.float32

    # Create empty datasets, and add axis attributes to them
    dset = data.create_dataset(
        "vis",
        shape=vis_shape,
        dtype=np.complex64,
        distributed=distributed,
        distributed_axis=distributed_axis,
    )
    dset.attrs["axis"] = vis_axis
    dset[:] = 0.0

    dset = data.create_flag(
        "vis_weight",
        shape=vis_shape,
        dtype=weight_dtype,
        distributed=distributed,
        distributed_axis=distributed_axis,
    )
    dset.attrs["axis"] = vis_axis
    dset[:] = 0.0

    dset = data.create_flag(
        "inputs",
        shape=(data.ninput, data.ntime),
        dtype=np.float32,
        distributed=False,
        distributed_axis=None,
    )
    dset.attrs["axis"] = np.array(["input", "time"])
    dset[:] = 0.0

    dset = data.create_dataset(
        "gain",
        shape=(data.nfreq, data.ninput, data.ntime),
        dtype=np.complex64,
        distributed=distributed,
        distributed_axis=distributed_axis,
    )
    dset.attrs["axis"] = np.array(["freq", "input", "time"])
    dset[:] = 0.0

    return data
Exemple #12
0
    def __init__(self, *args, **kwargs):

        # Pull out the values of needed arguments
        axes_from = kwargs.pop("axes_from", None)
        attrs_from = kwargs.pop("attrs_from", None)
        dist = kwargs.pop("distributed", True)
        comm = kwargs.pop("comm", None)
        self.allow_chunked = kwargs.pop("allow_chunked", False)

        # Run base initialiser
        memh5.BasicCont.__init__(self, distributed=dist, comm=comm)

        # Check to see if this call looks like it was called like
        # memh5.MemDiskGroup would have been. If it is, we're probably trying to
        # create a bare container, so don't initialise any datasets. This
        # behaviour is needed to support tod.concatenate
        if len(args) or "data_group" in kwargs:
            return

        # Create axis entries
        for axis in self.axes:

            axis_map = None

            # Check if axis is specified in initialiser
            if axis in kwargs:

                # If axis is an integer, turn into an arange as a default definition
                if isinstance(kwargs[axis], int):
                    axis_map = np.arange(kwargs[axis])
                else:
                    axis_map = kwargs[axis]

            # If not set in the arguments copy from another object if set
            elif axes_from is not None and axis in axes_from.index_map:
                axis_map = axes_from.index_map[axis]

            # Set the index_map[axis] if we have a definition, otherwise throw an error
            if axis_map is not None:
                self.create_index_map(axis, axis_map)
            else:
                raise RuntimeError("No definition of axis %s supplied." % axis)

        reverse_map_stack = None
        # Create reverse map
        if "reverse_map_stack" in kwargs:
            # If axis is an integer, turn into an arange as a default definition
            if isinstance(kwargs["reverse_map_stack"], int):
                reverse_map_stack = np.arange(kwargs["reverse_map_stack"])
            else:
                reverse_map_stack = kwargs["reverse_map_stack"]

        # If not set in the arguments copy from another object if set
        elif axes_from is not None and "stack" in axes_from.reverse_map:
            reverse_map_stack = axes_from.reverse_map["stack"]

        # Set the reverse_map['stack'] if we have a definition,
        # otherwise do NOT throw an error, errors are thrown in
        # classes that actually need a reverse stack
        if reverse_map_stack is not None:
            self.create_reverse_map("stack", reverse_map_stack)

        # Iterate over datasets and initialise any that specify it
        for name, spec in self.dataset_spec.items():
            if "initialise" in spec and spec["initialise"]:
                self.add_dataset(name)

        # Copy over attributes
        if attrs_from is not None:

            # Copy attributes from container root
            memh5.copyattrs(attrs_from.attrs, self.attrs)

            # Copy attributes over from any common datasets
            for name in self.dataset_spec.keys():
                if name in self.datasets and name in attrs_from.datasets:
                    memh5.copyattrs(
                        attrs_from.datasets[name].attrs, self.datasets[name].attrs
                    )

            # Make sure that the __memh5_subclass attribute is accurate
            clspath = self.__class__.__module__ + "." + self.__class__.__name__
            clsattr = self.attrs.get("__memh5_subclass", None)
            if clsattr and (clsattr != clspath):
                self.attrs["__memh5_subclass"] = clspath
    def separate_pol_and_bl(self, keep_dist_axis=False):
        """Separate baseline axis to polarization and baseline.

        This will create and return a Timestream container holding the polarization
        and baseline separated data.

        Parameters
        ----------
        keep_dist_axis : bool, optional
            Whether to redistribute main data to the original dist axis if the
            dist axis has changed during the operation. Default False.

        """

        # if dist axis is baseline, redistribute it along time
        original_dist_axis = self.main_data_dist_axis
        if 'baseline' == self.main_data_axes[original_dist_axis]:
            keep_dist_axis = False  # can not keep dist axis in this case
            self.redistribute(0)

        # create a Timestream container to hold the pol and bl separated data
        ts = timestream.Timestream(dist_axis=self.main_data_dist_axis,
                                   comm=self.comm)

        feedno = sorted(self['feedno'][:].tolist())
        xchans = [self['channo'][feedno.index(fd)][0] for fd in feedno]
        ychans = [self['channo'][feedno.index(fd)][1] for fd in feedno]

        nfeed = len(feedno)
        xx_pairs = [(xchans[i], xchans[j]) for i in xrange(nfeed)
                    for j in xrange(i, nfeed)]
        yy_pairs = [(ychans[i], ychans[j]) for i in xrange(nfeed)
                    for j in xrange(i, nfeed)]
        xy_pairs = [(xchans[i], ychans[j]) for i in xrange(nfeed)
                    for j in xrange(i, nfeed)]
        yx_pairs = [(ychans[i], xchans[j]) for i in xrange(nfeed)
                    for j in xrange(i, nfeed)]

        blorder = [tuple(bl) for bl in self['blorder']]
        conj_blorder = [tuple(bl[::-1]) for bl in self['blorder']]

        def _get_ind(chp):
            try:
                return False, blorder.index(chp)
            except ValueError:
                return True, conj_blorder.index(chp)

        # xx
        xx_list = [_get_ind(chp) for chp in xx_pairs]
        xx_inds = [ind for (cj, ind) in xx_list]
        xx_conj = [cj for (cj, ind) in xx_list]
        # yy
        yy_list = [_get_ind(chp) for chp in yy_pairs]
        yy_inds = [ind for (cj, ind) in yy_list]
        yy_conj = [cj for (cj, ind) in yy_list]
        # xy
        xy_list = [_get_ind(chp) for chp in xy_pairs]
        xy_inds = [ind for (cj, ind) in xy_list]
        xy_conj = [cj for (cj, ind) in xy_list]
        # yx
        yx_list = [_get_ind(chp) for chp in yx_pairs]
        yx_inds = [ind for (cj, ind) in yx_list]
        yx_conj = [cj for (cj, ind) in yx_list]

        # create a MPIArray to hold the pol and bl separated vis
        rvis = self.main_data.local_data
        shp = rvis.shape[:2] + (4, len(xx_inds))
        vis = np.empty(shp, dtype=rvis.dtype)
        vis[:, :, 0] = np.where(xx_conj, rvis[:, :, xx_inds].conj(),
                                rvis[:, :, xx_inds])  # xx
        vis[:, :, 1] = np.where(yy_conj, rvis[:, :, yy_inds].conj(),
                                rvis[:, :, yy_inds])  # yy
        vis[:, :, 2] = np.where(xy_conj, rvis[:, :, xy_inds].conj(),
                                rvis[:, :, xy_inds])  # xy
        vis[:, :, 3] = np.where(yx_conj, rvis[:, :, yx_inds].conj(),
                                rvis[:, :, yx_inds])  # yx

        vis = mpiarray.MPIArray.wrap(vis,
                                     axis=self.main_data_dist_axis,
                                     comm=self.comm)

        # create main data
        ts.create_main_data(vis)
        # copy attrs from rt
        memh5.copyattrs(self.main_data.attrs, ts.main_data.attrs)
        # create attrs of this dataset
        ts.main_data.attrs[
            'dimname'] = 'Time, Frequency, Polarization, Baseline'

        # create a MPIArray to hold the pol and bl separated vis_mask
        rvis_mask = self['vis_mask'].local_data
        shp = rvis_mask.shape[:2] + (4, len(xx_inds))
        vis_mask = np.empty(shp, dtype=rvis_mask.dtype)
        vis_mask[:, :, 0] = rvis_mask[:, :, xx_inds]  # xx
        vis_mask[:, :, 1] = rvis_mask[:, :, yy_inds]  # yy
        vis_mask[:, :, 2] = rvis_mask[:, :, xy_inds]  # xy
        vis_mask[:, :, 3] = rvis_mask[:, :, yx_inds]  # yx

        vis_mask = mpiarray.MPIArray.wrap(vis_mask,
                                          axis=self.main_data_dist_axis,
                                          comm=self.comm)

        # create vis_mask
        axis_order = ts.main_axes_ordered_datasets[ts.main_data_name]
        ts.create_main_axis_ordered_dataset(axis_order, 'vis_mask', vis_mask,
                                            axis_order)

        # create other datasets needed
        # pol ordered dataset
        p = self.pol_dict
        ts.create_pol_ordered_dataset('pol',
                                      data=np.array(
                                          [p['xx'], p['yy'], p['xy'], p['yx']],
                                          dtype='i4'))
        ts['pol'].attrs['pol_type'] = 'linear'

        # bl ordered dataset
        blorder = np.array([[feedno[i], feedno[j]] for i in xrange(nfeed)
                            for j in xrange(i, nfeed)])
        ts.create_bl_ordered_dataset('blorder', data=blorder)
        # copy attrs of this dset
        memh5.copyattrs(self['blorder'].attrs, ts['blorder'].attrs)
        # other bl ordered dataset
        if len(
                set(self.bl_ordered_datasets.keys()) -
            {'vis', 'vis_mask', 'blorder', 'true_blorder', 'bl_pol'}) > 0:
            raise RuntimeError('Should not have other bl_ordered_datasets %s' %
                               (set(self.bl_ordered_datasets.keys()) -
                                {'vis', 'vis_mask', 'blorder'}))

        # copy other attrs
        for attrs_name, attrs_value in self.attrs.iteritems():
            if attrs_name not in self.time_ordered_attrs:
                ts.attrs[attrs_name] = attrs_value

        # copy other datasets
        for dset_name, dset in self.iteritems():
            if dset_name == self.main_data_name or dset_name == 'vis_mask':
                # already created above
                continue
            elif dset_name in self.main_axes_ordered_datasets.keys():
                if dset_name in self.bl_ordered_datasets.keys():
                    # already created above
                    continue
                else:
                    axis_order = self.main_axes_ordered_datasets[dset_name]
                    axis = None
                    for order in axis_order:
                        if isinstance(order, int):
                            axis = order
                    if axis is None:
                        raise RuntimeError(
                            'Invalid axis order %s for dataset %s' %
                            (axis_order, dset_name))
                    ts.create_main_axis_ordered_dataset(
                        axis, dset_name, dset.data, axis_order)
            elif dset_name in self.time_ordered_datasets.keys():
                axis_order = self.time_ordered_datasets[dset_name]
                ts.create_time_ordered_dataset(dset_name, dset.data,
                                               axis_order)
            elif dset_name in self.feed_ordered_datasets.keys():
                if dset_name == 'channo':  # channo no useful for Timestream
                    continue
                else:
                    axis_order = self.feed_ordered_datasets[dset_name]
                    ts.create_feed_ordered_dataset(dset_name, dset.data,
                                                   axis_order)
            else:
                if dset.common:
                    ts.create_dataset(dset_name, data=dset)
                elif dset.distributed:
                    ts.create_dataset(dset_name,
                                      data=dset.data,
                                      shape=dset.shape,
                                      dtype=dset.dtype,
                                      distributed=True,
                                      distributed_axis=dset.distributed_axis)

            # copy attrs of this dset
            memh5.copyattrs(dset.attrs, ts[dset_name].attrs)

        # redistribute self to original axis
        if keep_dist_axis:
            self.redistribute(original_dist_axis)

        return ts
    def separate_pol_and_bl(self, keep_dist_axis=False):
        """Separate baseline axis to polarization and baseline.

        This will create and return a Timestream container holding the polarization
        and baseline separated data.

        Parameters
        ----------
        keep_dist_axis : bool, optional
            Whether to redistribute main data to the original dist axis if the
            dist axis has changed during the operation. Default False.

        """

        # if dist axis is baseline, redistribute it along time
        original_dist_axis = self.main_data_dist_axis
        if 'baseline' == self.main_data_axes[original_dist_axis]:
            keep_dist_axis = False # can not keep dist axis in this case
            self.redistribute(0)

        # create a Timestream container to hold the pol and bl separated data
        ts = timestream.Timestream(dist_axis=self.main_data_dist_axis, comm=self.comm)

        feedno = sorted(self['feedno'][:].tolist())
        xchans = [ self['channo'][feedno.index(fd)][0] for fd in feedno ]
        ychans = [ self['channo'][feedno.index(fd)][1] for fd in feedno ]

        nfeed = len(feedno)
        xx_pairs = [ (xchans[i], xchans[j]) for i in xrange(nfeed) for j in xrange(i, nfeed) ]
        yy_pairs = [ (ychans[i], ychans[j]) for i in xrange(nfeed) for j in xrange(i, nfeed) ]
        xy_pairs = [ (xchans[i], ychans[j]) for i in xrange(nfeed) for j in xrange(i, nfeed) ]
        yx_pairs = [ (ychans[i], xchans[j]) for i in xrange(nfeed) for j in xrange(i, nfeed) ]

        blorder = [ tuple(bl) for bl in self['blorder'] ]
        conj_blorder = [ tuple(bl[::-1]) for bl in self['blorder'] ]

        def _get_ind(chp):
            try:
                return False, blorder.index(chp)
            except ValueError:
                return True, conj_blorder.index(chp)
        # xx
        xx_list = [ _get_ind(chp) for chp in xx_pairs ]
        xx_inds = [ ind for (cj, ind) in xx_list ]
        xx_conj = [ cj for (cj, ind) in xx_list ]
        # yy
        yy_list = [ _get_ind(chp) for chp in yy_pairs ]
        yy_inds = [ ind for (cj, ind) in yy_list ]
        yy_conj = [ cj for (cj, ind) in yy_list ]
        # xy
        xy_list = [ _get_ind(chp) for chp in xy_pairs ]
        xy_inds = [ ind for (cj, ind) in xy_list ]
        xy_conj = [ cj for (cj, ind) in xy_list ]
        # yx
        yx_list = [ _get_ind(chp) for chp in yx_pairs ]
        yx_inds = [ ind for (cj, ind) in yx_list ]
        yx_conj = [ cj for (cj, ind) in yx_list ]

        # create a MPIArray to hold the pol and bl separated vis
        rvis = self.main_data.local_data
        shp = rvis.shape[:2] + (4, len(xx_inds))
        vis = np.empty(shp, dtype=rvis.dtype)
        vis[:, :, 0] = np.where(xx_conj, rvis[:, :, xx_inds].conj(), rvis[:, :, xx_inds]) # xx
        vis[:, :, 1] = np.where(yy_conj, rvis[:, :, yy_inds].conj(), rvis[:, :, yy_inds]) # yy
        vis[:, :, 2] = np.where(xy_conj, rvis[:, :, xy_inds].conj(), rvis[:, :, xy_inds]) # xy
        vis[:, :, 3] = np.where(yx_conj, rvis[:, :, yx_inds].conj(), rvis[:, :, yx_inds]) # yx

        vis = mpiarray.MPIArray.wrap(vis, axis=self.main_data_dist_axis, comm=self.comm)

        # create main data
        ts.create_main_data(vis)
        # copy attrs from rt
        memh5.copyattrs(self.main_data.attrs, ts.main_data.attrs)
        # create attrs of this dataset
        ts.main_data.attrs['dimname'] = 'Time, Frequency, Polarization, Baseline'

        # create a MPIArray to hold the pol and bl separated vis_mask
        rvis_mask = self['vis_mask'].local_data
        shp = rvis_mask.shape[:2] + (4, len(xx_inds))
        vis_mask = np.empty(shp, dtype=rvis_mask.dtype)
        vis_mask[:, :, 0] = rvis_mask[:, :, xx_inds] # xx
        vis_mask[:, :, 1] = rvis_mask[:, :, yy_inds] # yy
        vis_mask[:, :, 2] = rvis_mask[:, :, xy_inds] # xy
        vis_mask[:, :, 3] = rvis_mask[:, :, yx_inds] # yx

        vis_mask = mpiarray.MPIArray.wrap(vis_mask, axis=self.main_data_dist_axis, comm=self.comm)

        # create vis_mask
        axis_order = ts.main_axes_ordered_datasets[ts.main_data_name]
        ts.create_main_axis_ordered_dataset(axis_order, 'vis_mask', vis_mask, axis_order)

        # create other datasets needed
        # pol ordered dataset
        ts.create_pol_ordered_dataset('pol', data=np.array(['xx', 'yy', 'xy', 'yx']))
        ts['pol'].attrs['pol_type'] = 'linear'

        # bl ordered dataset
        blorder = np.array([ [feedno[i], feedno[j]] for i in xrange(nfeed) for j in xrange(i, nfeed) ])
        ts.create_bl_ordered_dataset('blorder', data=blorder)
        # copy attrs of this dset
        memh5.copyattrs(self['blorder'].attrs, ts['blorder'].attrs)
        # other bl ordered dataset
        if len(set(self.bl_ordered_datasets.keys()) - {'vis', 'vis_mask', 'blorder'}) > 0:
            raise RuntimeError('Should not have other bl_ordered_datasets %s' % (set(self.bl_ordered_datasets.keys()) - {'vis', 'vis_mask', 'blorder'}))

        # copy other attrs
        for attrs_name, attrs_value in self.attrs.iteritems():
            if attrs_name not in self.time_ordered_attrs:
                ts.attrs[attrs_name] = attrs_value

        # copy other datasets
        for dset_name, dset in self.iteritems():
            if dset_name == self.main_data_name or dset_name == 'vis_mask':
                # already created above
                continue
            elif dset_name in self.main_axes_ordered_datasets.keys():
                if dset_name in self.bl_ordered_datasets.keys():
                    # already created above
                    continue
                else:
                    axis_order = self.main_axes_ordered_datasets[dset_name]
                    axis = None
                    for order in axis_order:
                        if isinstance(order, int):
                            axis = order
                    if axis is None:
                        raise RuntimeError('Invalid axis order %s for dataset %s' % (axis_order, dset_name))
                    ts.create_main_axis_ordered_dataset(axis, dset_name, dset.data, axis_order)
            elif dset_name in self.time_ordered_datasets.keys():
                axis_order = self.time_ordered_datasets[dset_name]
                ts.create_time_ordered_dataset(dset_name, dset.data, axis_order)
            elif dset_name in self.feed_ordered_datasets.keys():
                if dset_name == 'channo': # channo no useful for Timestream
                    continue
                else:
                    axis_order = self.feed_ordered_datasets[dset_name]
                    ts.create_feed_ordered_dataset(dset_name, dset.data, axis_order)
            else:
                if dset.common:
                    ts.create_dataset(dset_name, data=dset)
                elif dset.distributed:
                    ts.create_dataset(dset_name, data=dset.data, shape=dset.shape, dtype=dset.dtype, distributed=True, distributed_axis=dset.distributed_axis)

            # copy attrs of this dset
            memh5.copyattrs(dset.attrs, ts[dset_name].attrs)

        # redistribute self to original axis
        if keep_dist_axis:
            self.redistribute(original_dist_axis)

        return ts
Exemple #15
0
def process_gated_data(data, only_off=False):
    """
    Processes fast gating data and turns it into gated form.

    Parameters
    ----------
    data : andata.CorrData
        Correlator data with noise source switched synchronously with the
        integration.
    only_off : boolean
        Only return the off dataset.  Do not return gated datasets.

    Returns
    -------
    newdata : andata.CorrData
        Correlator data folded on the noise source.

    Comments
    --------
    For now the correlator only supports fast gating with one gate
    (gated_vis1) and 50% duty cycle. The vis dataset contains on+off
    and the gated_vis1 contains on-off. This function returns a new
    andata object with vis containing the off data only and gated_vis1
    as in the original andata object. The attribute
    'gpu.gpu_intergration_period' is divided by 2 since during an
    integration half of the frames have on data.
    """
    # Make sure we're distributed over something other than time
    data.redistribute("freq")

    # Get distribution parameters
    dist = isinstance(data.vis, memh5.MemDatasetDistributed)
    comm = data.vis.comm

    # Construct new CorrData object for gated dataset
    newdata = andata.CorrData.__new__(andata.CorrData)
    if dist:
        memh5.BasicCont.__init__(newdata, distributed=dist, comm=comm)
    else:
        memh5.BasicCont.__init__(newdata, distributed=dist)
    memh5.copyattrs(data.attrs, newdata.attrs)

    # Add index maps to newdata
    newdata.create_index_map("freq", data.index_map["freq"])
    newdata.create_index_map("prod", data.index_map["prod"])
    newdata.create_index_map("input", data.input)
    newdata.create_index_map("time", data.index_map["time"])

    # Add datasets (for noise OFF) to newdata
    # Extract the noise source off data
    vis_off = 0.5 * (
        data.vis[:].view(np.ndarray) - data["gated_vis1"][:].view(np.ndarray)
    )

    # Turn vis_off into MPIArray if we are distributed
    if dist:
        vis_off = mpiarray.MPIArray.wrap(vis_off, axis=0, comm=comm)

    # Add new visibility dataset
    vis_dset = newdata.create_dataset("vis", data=vis_off, distributed=dist)
    memh5.copyattrs(data.vis.attrs, vis_dset.attrs)

    # Add gain dataset (if exists) for vis_off.
    # These will be the gains for both the noise on ON and OFF data
    if "gain" in data:
        gain = data.gain[:].view(np.ndarray)
        # Turn gain into MPIArray if we are distributed
        if dist:
            gain = mpiarray.MPIArray.wrap(gain, axis=0, comm=comm)

        gain_dset = newdata.create_dataset("gain", data=gain, distributed=dist)
        memh5.copyattrs(data.gain.attrs, gain_dset.attrs)

    # Pull out weight dataset if it exists.
    # These will be the weights for both the noise on ON and OFF data
    if "vis_weight" in data.flags:
        vis_weight = data.weight[:].view(np.ndarray)
        # Turn vis_weight into MPIArray if we are distributed
        if dist:
            vis_weight = mpiarray.MPIArray.wrap(vis_weight, axis=0, comm=comm)

        vis_weight_dset = newdata.create_flag(
            "vis_weight", data=vis_weight, distributed=dist
        )
        memh5.copyattrs(data.weight.attrs, vis_weight_dset.attrs)

    # Add gated dataset (only gated_vis1 currently supported by correlator
    # with 50% duty cycle)
    if not only_off:
        gated_vis1 = data["gated_vis1"][:].view(np.ndarray)
        # Turn gated_vis1 into MPIArray if we are distributed
        if dist:
            gated_vis1 = mpiarray.MPIArray.wrap(gated_vis1, axis=0, comm=comm)

        gate_dset = newdata.create_dataset(
            "gated_vis1", data=gated_vis1, distributed=dist
        )
        memh5.copyattrs(data["gated_vis1"].attrs, gate_dset.attrs)

    # The CHIME pipeline uses gpu.gpu_intergration_period to estimate the integration period
    # for both the on and off gates. That number has to be changed (divided by 2) since
    # with fast gating one integration period has 1/2 of data for the on gate and 1/2
    # for the off gate
    newdata.attrs["gpu.gpu_intergration_period"] = (
        data.attrs["gpu.gpu_intergration_period"] // 2
    )

    return newdata
Exemple #16
0
def process_synced_data(data, ni_params=None, only_off=False):
    """Turn a synced noise source observation into gated form.

    This will decimate the visibility to only the noise source off bins, and
    will add 1 or more gated on-off dataset according to the specification in
    doclib:5.

    Parameters
    ----------
    data : andata.CorrData
        Correlator data with noise source switched synchronously with the
        integration.
    ni_params : dict
        Dictionary with the noise injection parameters. Optional
        for data after ctime=1435349183. ni_params has the following keys
        - ni_period: Noise injection period in GPU integrations.
        It is assummed to be the same for all the enabled noise sources
        - ni_on_bins: A list of lists, one per enabled noise source,
        with the corresponding ON gates (within a period). For each
        noise source, the list contains the indices of the time frames
        for which the source is ON.
        Example: For 3 GPU integration period (3 gates: 0, 1, 2), two enabled
        noise sources, one ON during gate 0, the other ON during gate 1,
        and both OFF during gate 2, then
        ```
        ni_params = {'ni_period':3, 'ni_on_bins':[[0], [1]]}
        ```
    only_off : boolean
        Only return the off dataset.  Do not return gated datasets.

    Returns
    -------
    newdata : andata.CorrData
        Correlator data folded on the noise source.

    Comments
    --------
    - The function assumes that the fpga frame counter, which is used to
    determine the noise injection gating parameters, is unwrapped.
    - For noise injection data before ctime=1435349183 (i.e. for noise
    injection data before 20150626T200540Z_pathfinder_corr) the noise
    injection information is not in the headers so this function cannot be
    used to determine the noise injection parameters. A different method is
    required. Although it is recommended to check the data directly in this
    case, the previous version of this function assumed that
    ni_params = {'ni_period':2, 'ni_on_bins':[[0],]}
    for noise injection data before ctime=1435349183. Although this is not
    always true, it is true for big old datasets like pass1g.
    Use the value of ni_params recommended above to reproduce the
    results of the old function with the main old datasets.
    - Data (visibility, gain and weight datasets) are averaged for all the
    off gates within the noise source period, and also for all the on
    gates of each noise source.
    - For the time index map, only one timestamp per noise period is kept
    (no averaging)
    """

    if ni_params is None:
        # ctime before which the noise injection information is not in the
        # headers so this function cannot be used to determine the noise
        # injection parameters.
        ctime_no_noise_inj_data = 1435349183
        if data.index_map["time"]["ctime"][0] > ctime_no_noise_inj_data:
            # All the data required to figure out the noise inj gating is in
            # the data header
            try:
                ni_params = _find_ni_params(data)
            except ValueError:
                warn_str = (
                    "There are no enabled noise sources for these data. "
                    "Returning input"
                )
                warnings.warn(warn_str)
                return data
        else:
            # This is data before ctime = 1435349183. Noise injection
            # parameters are not in the data header. Raise error
            t = datetime.datetime.utcfromtimestamp(ctime_no_noise_inj_data)
            t_str = t.strftime("%Y %b %d %H:%M:%S UTC")
            err_str = (
                "ni_params parameter is required for data before "
                "%s (ctime=%i)." % (t_str, ctime_no_noise_inj_data)
            )
            raise Exception(err_str)

    if len([s for s in data.datasets.keys() if "gated_vis" in s]):
        # If there are datasets with gated_vis in their names then assume
        # this is fast gating data, where the vis dataset has on+off and
        # the vis_gatedxx has onxx-off. Process separatedly since in
        # this case the noise injection parameters are not in gpu
        # integration frames but in fpga frames and the gates are already
        # separated
        newdata = process_gated_data(data, only_off=only_off)
    else:
        # time bins with noise ON for each source (within a noise period)
        # This is a list of lists, each list corresponding to the ON time bins
        # for each noise source.
        ni_on_bins = ni_params["ni_on_bins"]

        # Number of enabled noise sources
        N_ni_sources = len(ni_on_bins)

        # Noise injection period (assume all sources have same period)
        ni_period = ni_params["ni_period"]

        # time bins with all noise sources off (within a noise period)
        ni_off_bins = np.delete(list(range(ni_period)), np.concatenate(ni_on_bins))

        # Find largest number of exact noise injection periods
        nt = ni_period * (data.ntime // ni_period)

        # Make sure we're distributed over something other than time
        data.redistribute("freq")

        # Get distribution parameters
        dist = isinstance(data.vis, memh5.MemDatasetDistributed)
        comm = data.vis.comm

        # Construct new CorrData object for gated dataset
        newdata = andata.CorrData.__new__(andata.CorrData)
        if dist:
            memh5.BasicCont.__init__(newdata, distributed=dist, comm=comm)
        else:
            memh5.BasicCont.__init__(newdata, distributed=dist)
        memh5.copyattrs(data.attrs, newdata.attrs)

        # Add index maps to newdata
        newdata.create_index_map("freq", data.index_map["freq"])
        newdata.create_index_map("prod", data.index_map["prod"])
        newdata.create_index_map("input", data.input)
        # Extract timestamps for OFF bins. Only one timestamp per noise period is
        # kept. These will be the timestamps for both the noise on ON and OFF data
        time = data.index_map["time"][ni_off_bins[0] : nt : ni_period]
        folding_period = time["ctime"][1] - time["ctime"][0]
        folding_start = time["ctime"][0]
        # Add index map for noise OFF timestamps.
        newdata.create_index_map("time", time)

        # Add datasets (for noise OFF) to newdata
        # Extract the noise source off data
        if len(ni_off_bins) > 1:
            # Average all time bins with noise OFF within a period
            vis_sky = [data.vis[..., gate:nt:ni_period] for gate in ni_off_bins]
            vis_sky = np.mean(vis_sky, axis=0)
        else:
            vis_sky = data.vis[..., ni_off_bins[0] : nt : ni_period]

        # Turn vis_sky into MPIArray if we are distributed
        if dist:
            vis_sky = mpiarray.MPIArray.wrap(vis_sky, axis=0, comm=comm)

        # Add new visibility dataset
        vis_dset = newdata.create_dataset("vis", data=vis_sky, distributed=dist)
        memh5.copyattrs(data.vis.attrs, vis_dset.attrs)

        # Add gain dataset (if exists) for noise OFF data.
        # Gain dataset also averaged (within a period)
        # These will be the gains for both the noise on ON and OFF data
        if "gain" in data:
            if len(ni_off_bins) > 1:
                gain = [data.gain[..., gate:nt:ni_period] for gate in ni_off_bins]
                gain = np.mean(gain, axis=0)
            else:
                gain = data.gain[..., ni_off_bins[0] : nt : ni_period]

            # Turn gain into MPIArray if we are distributed
            if dist:
                gain = mpiarray.MPIArray.wrap(gain, axis=0, comm=comm)

            # Add new gain dataset
            gain_dset = newdata.create_dataset("gain", data=gain, distributed=dist)
            memh5.copyattrs(data.gain.attrs, gain_dset.attrs)

        # Pull out weight dataset if it exists.
        # vis_weight dataset also averaged (within a period)
        # These will be the weights for both the noise on ON and OFF data
        if "vis_weight" in data.flags:
            if len(ni_off_bins) > 1:
                vis_weight = [
                    data.weight[..., gate:nt:ni_period] for gate in ni_off_bins
                ]
                vis_weight = np.mean(vis_weight, axis=0)
            else:
                vis_weight = data.weight[..., ni_off_bins[0] : nt : ni_period]

            # Turn vis_weight into MPIArray if we are distributed
            if dist:
                vis_weight = mpiarray.MPIArray.wrap(vis_weight, axis=0, comm=comm)

            # Add new vis_weight dataset
            vis_weight_dset = newdata.create_flag(
                "vis_weight", data=vis_weight, distributed=dist
            )
            memh5.copyattrs(data.weight.attrs, vis_weight_dset.attrs)

        # Add gated datasets for each noise source:
        if not only_off:
            for i in range(N_ni_sources):
                # Construct the noise source only data
                vis_noise = [data.vis[..., gate:nt:ni_period] for gate in ni_on_bins[i]]
                vis_noise = np.mean(vis_noise, axis=0)  # Averaging
                vis_noise -= vis_sky  # Subtracting sky contribution

                # Turn vis_noise into MPIArray if we are distributed
                if dist:
                    vis_noise = mpiarray.MPIArray.wrap(vis_noise, axis=0, comm=comm)

                # Add noise source dataset
                gate_dset = newdata.create_dataset(
                    "gated_vis{0}".format(i + 1), data=vis_noise, distributed=dist
                )
                gate_dset.attrs["axis"] = np.array(
                    ["freq", "prod", "gated_time{0}".format(i + 1)]
                )
                gate_dset.attrs["folding_period"] = folding_period
                gate_dset.attrs["folding_start"] = folding_start

                # Construct array of gate weights (sum = 0)
                gw = np.zeros(ni_period, dtype=np.float)
                gw[ni_off_bins] = -1.0 / len(ni_off_bins)
                gw[ni_on_bins[i]] = 1.0 / len(ni_on_bins[i])
                gate_dset.attrs["gate_weight"] = gw

    return newdata