Example #1
0
    def __init__(self, parameter_file_or_dict=None, feedback=2):

        super(OneAndOne, self).__init__(parameter_file_or_dict, feedback)

        self.input_files = input_path(format_list(self.params['input_files']))
        self.output_files = output_path(format_list(self.params['output_files']), mkdir=False)

        self.iterable = self.params['iterable']
        if self.iterable:
            self.iter_start = self.params['iter_start']
            self.iter_step = self.params['iter_step']
            self.iter_num = self.params['iter_num']
        self._iter_cnt = 0 # inner iter counter

        # Inspect the `process` method to see how many arguments it takes.
        pro_argspec = inspect.getargspec(self.process)
        n_args = len(pro_argspec.args) - 1
        if n_args  > 1:
            msg = ("`process` method takes more than 1 argument, which is not"
                   " allowed")
            raise PipelineConfigError(msg)
        if pro_argspec.varargs or pro_argspec.keywords or pro_argspec.defaults:
            msg = ("`process` method may not have variable length or optional"
                   " arguments")
            raise PipelineConfigError(msg)
        if n_args == 0:
            self._no_input = True
        else: # n_args == 1
            self._no_input = False

            if len(self._in) != n_args and len(self.input_files) == 0:
                msg = ("No data to iterate over. There are no 'in' keys and no 'input_files'")
                raise PipelineConfigError(msg)
Example #2
0
    def read_input(self):
        """Method for reading time ordered data input."""

        mode = self.params['mode']
        start = self.params['start']
        stop = self.params['stop']
        dist_axis = self.params['dist_axis']
        tag_input_iter = self.params['tag_input_iter']

        if self.iterable and tag_input_iter:
            input_files = input_path(self.input_files,
                                     iteration=self.iteration)
        else:
            input_files = self.input_files
        tod = self._Tod_class(input_files,
                              mode,
                              start,
                              stop,
                              dist_axis,
                              use_hints=False)

        tod, full_data = self.data_select(tod)

        tod.load_all()

        return tod
Example #3
0
    def read_process_write(self, tod):
        """Reads input, executes any processing and writes output."""

        # determine if rt or ts from the input tod, and set the correct _Tod_class
        if self._no_input:
            if not tod is None:
                # This should never happen.  Just here to catch bugs.
                raise RuntimeError("Somehow `input` was set.")
        else:
            # read from files
            if tod is None:
                if self.input_files is None or len(self.input_files) == 0:
                    if mpiutil.rank0:
                        msg = 'No file to read from, will stop then...'
                        logger.info(msg)
                    self.stop_iteration(True)
                    return None
                tag_input_iter = self.params['tag_input_iter']
                if self.iterable and tag_input_iter:
                    input_files = input_path(self.input_files,
                                             iteration=self.iteration)
                else:
                    input_files = self.input_files
                # ensure all input_files are exist
                for infile in input_files:
                    if not path.exists(infile):
                        if mpiutil.rank0:
                            msg = 'Missing input file %s, will stop then...' % infile
                            logger.info(msg)
                        self.stop_iteration(True)
                        return None
                # see 'vis' dataset from the first input file
                with h5py.File(input_files[0], 'r') as f:
                    vis_shp = f['vis'].shape
                if len(vis_shp) == 3:
                    self._Tod_class = RawTimestream
                elif len(vis_shp) == 4:
                    self._Tod_class = Timestream
                else:
                    raise RuntimeError(
                        'Something wrong happened, dimension of vis data != 3 or 4'
                    )
            # from arg
            else:
                if isinstance(tod, RawTimestream):
                    self._Tod_class = RawTimestream
                elif isinstance(tod, Timestream):
                    self._Tod_class = Timestream
                else:
                    raise ValueError(
                        'Invaid input %s, need either a RawTimestream or Timestream object'
                        % tod)

                tod, full_data = self.subset_select(tod)
                if not full_data:
                    tod = tod.subset(return_copy=False)

        return super(TimestreamTask, self).read_process_write(tod)
Example #4
0
    def process(self, ts):

        assert isinstance(
            ts, Timestream
        ), '%s only works for Timestream object' % self.__class__.__name__

        gain_file = self.params['gain_file']
        tag_input_iter = self.params['tag_input_iter']
        if tag_input_iter:
            gain_file = input_path(gain_file, self.iteration)

        # read gain from file
        with h5py.File(gain_file, 'r') as f:
            gain = f['gain'][:]
            gain_src = f['gain'].attrs['calibrator']
            gain_freq = f['gain'].attrs['freq']
            gain_pol = f['gain'].attrs['pol']
            gain_feed = f['gain'].attrs['feed']

        ts.redistribute('baseline')

        feedno = ts['feedno'][:].tolist()
        pol = [ts.pol_dict[p] for p in ts['pol'][:]]  # as string
        gain_pd = {'xx': 0, 'yy': 1, 0: 'xx', 1: 'yy'}  # for gain related op
        freq = ts.freq[:]
        nf = len(freq)

        # shold check freq, pol and feed here, omit it now...

        for fi in range(nf):
            for pi in [pol.index('xx'), pol.index('yy')]:
                pi_ = gain_pd[pol[pi]]
                for bi, (fd1, fd2) in enumerate(ts['blorder'].local_data):
                    g1 = gain[fi, pi_, feedno.index(fd1)]
                    g2 = gain[fi, pi_, feedno.index(fd2)]
                    if np.isfinite(g1) and np.isfinite(g2):
                        ts.local_vis[:, fi, pi, bi] /= (g1 * np.conj(g2))
                    else:
                        # mask the un-calibrated vis
                        ts.local_vis_mask[:, fi, pi, bi] = True

        return super(Apply, self).process(ts)
Example #5
0
    def read_input(self):
        input_file = input_path(self.params['input_files'])

        return memh5.MemGroup.from_hdf5(input_file, distributed=True, hints=False)
Example #6
0
 def _init_input_files(self):
     self.input_files = input_path(format_list(self.params['input_files']))
Example #7
0
    def read_input(self):
        input_file = input_path(self.params['input_files'])

        return memh5.MemGroup.from_hdf5(input_file,
                                        distributed=True,
                                        hints=False)
Example #8
0
    def process(self, ts):

        assert isinstance(
            ts, Timestream
        ), '%s only works for Timestream object' % self.__class__.__name__

        check = self.params['check']
        load_data = self.params['load_data']

        # cache to file
        if self.params['cache_to_file']:
            cache_file_name = input_path(self.params['cache_file_name'])
            if not os.path.isfile(cache_file_name):
                # write ts to cache_file_name
                cache_file_name = output_path(self.params['cache_file_name'],
                                              mkdir=True)
                ts.apply_mask(
                    fill_val=0)  # apply mask, fill 0 to masked values
                # create weight dataset
                weight = np.logical_not(ts.local_vis_mask).astype(
                    np.int16)  # use int16 to save memory
                weight = mpiarray.MPIArray.wrap(weight,
                                                axis=ts.main_data_dist_axis)
                axis_order = ts.main_axes_ordered_datasets[ts.main_data_name]
                ts.create_main_axis_ordered_dataset(axis_order, 'weight',
                                                    weight, axis_order)
                ts.attrs['ndays'] = 1  # record number of days accumulated

                ts.to_files(cache_file_name)

                # empty ts to release memory
                ts.empty()
            else:
                # accumulate ts to cache_file_name
                ts.apply_mask(
                    fill_val=0)  # apply mask, fill 0 to masked values
                for rk in range(ts.nproc):
                    if ts.rank == rk:
                        with h5py.File(cache_file_name, 'r+') as f:
                            # may need some check in future...
                            if np.prod(
                                    ts['vis'].local_shape
                            ) > 0:  # no need to write if no local data
                                slc = []
                                for st, ln in zip(ts['vis'].local_offset,
                                                  ts['vis'].local_shape):
                                    slc.append(slice(st, st + ln))
                                f['vis'][tuple(
                                    slc)] += ts.local_vis  # accumulate vis
                                if 'weight' in ts.iterkeys():
                                    f['weight'][tuple(
                                        slc)] += ts['weight'].local_data
                                else:
                                    f['weight'][tuple(slc)] += np.logical_not(
                                        ts.local_vis_mask).astype(
                                            np.int16)  # update weight
                                f['vis_mask'][tuple(slc)] = np.where(
                                    f['weight'][tuple(slc)] != 0, False,
                                    True)  # update mask
                            if rk == 0:
                                f.attrs['ndays'] += 1
                    mpiutil.barrier(ts.comm)

                # empty ts to release memory
                ts.empty()

            return cache_file_name

        # accumulate in memory
        # ts.redistribute('baseline')

        # load data from disk if load_data is set
        if (self.data is None) and (not load_data is None):
            self.data = Timestream(load_data,
                                   mode='r',
                                   start=0,
                                   stop=None,
                                   dist_axis=ts.main_data_dist_axis,
                                   use_hints=True,
                                   comm=ts.comm)
            self.data.load_all()

        if self.data is None:
            self.data = ts
            # self.data = ts.copy()
            self.data.apply_mask(
                fill_val=0)  # apply mask, fill 0 to masked values
            # create weight dataset
            weight = np.logical_not(self.data.local_vis_mask).astype(
                np.int16)  # use int16 to save memory
            weight = mpiarray.MPIArray.wrap(weight,
                                            axis=self.data.main_data_dist_axis)
            axis_order = self.data.main_axes_ordered_datasets[
                self.data.main_data_name]
            self.data.create_main_axis_ordered_dataset(axis_order, 'weight',
                                                       weight, axis_order)
            self.data.attrs['ndays'] = 1  # record number of days accumulated
        else:
            # make they are distributed along the same axis
            ts.redistribute(self.data.main_data_dist_axis)
            # check for ra, dec
            ra_self = self.data['ra_dec'].local_data[:, 0]
            if mpiutil.rank0 and ra_self[0] > ra_self[1]:
                ra_self[0] -= 2 * np.pi
            ra_ts = ts['ra_dec'].local_data[:, 0]
            if mpiutil.rank0 and ra_ts[0] > ra_ts[1]:
                ra_ts[0] -= 2 * np.pi

            delta = 2 * np.pi / self.data['ra_dec'].shape[0]
            if not np.allclose(ra_self, ra_ts, rtol=0, atol=delta):
                print 'RA not align within %f for rank %d, max gap %f' % (
                    delta, mpiutil.rank, np.abs(ra_ts - ra_self).max())
            # assert np.allclose(self.data['ra_dec'].local_data[:, 0], ts['ra_dec'].local_data[:, 0], rtol=0, atol=2*np.pi/self.data['ra_dec'].shape[0]), 'Can not accumulate data, RA not align.'
            # assert np.allclose(self.data['ra_dec'].local_data[:, 1], ts['ra_dec'].local_data[:, 1]), 'Can not accumulate data with different DEC.'
            # other checks if required
            if check:
                assert self.data.attrs['telescope'] == ts.attrs[
                    'telescope'], 'Data are observed by different telescopes %s and %s' % (
                        self.data.attrs['telescope'], ts.attrs['telescope'])
                assert np.allclose(self.data.local_freq,
                                   ts.local_freq), 'freq not align'
                assert len(self.data.local_pol) == len(
                    ts.local_pol) and (self.data.local_pol
                                       == ts.local_pol).all(), 'pol not align'
                assert np.allclose(self.data.local_bl,
                                   ts.local_bl), 'bl not align'

            ts.apply_mask(fill_val=0)  # apply mask, fill 0 to masked values
            self.data.local_vis[:] += ts.local_vis  # accumulate vis
            if 'weight' in ts.iterkeys():
                self.data['weight'].local_data[:] += ts['weight'].local_data
            else:
                self.data['weight'].local_data[:] += np.logical_not(
                    ts.local_vis_mask).astype(np.int16)  # update weight
            self.data.local_vis_mask[:] = np.where(
                self.data['weight'].local_data != 0, False,
                True)  # update mask
            self.data.attrs['ndays'] += 1

        return super(Accum, self).process(self.data)