def __init__(self, parameter_file_or_dict=None, feedback=2): super(OneAndOne, self).__init__(parameter_file_or_dict, feedback) self.input_files = input_path(format_list(self.params['input_files'])) self.output_files = output_path(format_list(self.params['output_files']), mkdir=False) self.iterable = self.params['iterable'] if self.iterable: self.iter_start = self.params['iter_start'] self.iter_step = self.params['iter_step'] self.iter_num = self.params['iter_num'] self._iter_cnt = 0 # inner iter counter # Inspect the `process` method to see how many arguments it takes. pro_argspec = inspect.getargspec(self.process) n_args = len(pro_argspec.args) - 1 if n_args > 1: msg = ("`process` method takes more than 1 argument, which is not" " allowed") raise PipelineConfigError(msg) if pro_argspec.varargs or pro_argspec.keywords or pro_argspec.defaults: msg = ("`process` method may not have variable length or optional" " arguments") raise PipelineConfigError(msg) if n_args == 0: self._no_input = True else: # n_args == 1 self._no_input = False if len(self._in) != n_args and len(self.input_files) == 0: msg = ("No data to iterate over. There are no 'in' keys and no 'input_files'") raise PipelineConfigError(msg)
def read_input(self): """Method for reading time ordered data input.""" mode = self.params['mode'] start = self.params['start'] stop = self.params['stop'] dist_axis = self.params['dist_axis'] tag_input_iter = self.params['tag_input_iter'] if self.iterable and tag_input_iter: input_files = input_path(self.input_files, iteration=self.iteration) else: input_files = self.input_files tod = self._Tod_class(input_files, mode, start, stop, dist_axis, use_hints=False) tod, full_data = self.data_select(tod) tod.load_all() return tod
def read_process_write(self, tod): """Reads input, executes any processing and writes output.""" # determine if rt or ts from the input tod, and set the correct _Tod_class if self._no_input: if not tod is None: # This should never happen. Just here to catch bugs. raise RuntimeError("Somehow `input` was set.") else: # read from files if tod is None: if self.input_files is None or len(self.input_files) == 0: if mpiutil.rank0: msg = 'No file to read from, will stop then...' logger.info(msg) self.stop_iteration(True) return None tag_input_iter = self.params['tag_input_iter'] if self.iterable and tag_input_iter: input_files = input_path(self.input_files, iteration=self.iteration) else: input_files = self.input_files # ensure all input_files are exist for infile in input_files: if not path.exists(infile): if mpiutil.rank0: msg = 'Missing input file %s, will stop then...' % infile logger.info(msg) self.stop_iteration(True) return None # see 'vis' dataset from the first input file with h5py.File(input_files[0], 'r') as f: vis_shp = f['vis'].shape if len(vis_shp) == 3: self._Tod_class = RawTimestream elif len(vis_shp) == 4: self._Tod_class = Timestream else: raise RuntimeError( 'Something wrong happened, dimension of vis data != 3 or 4' ) # from arg else: if isinstance(tod, RawTimestream): self._Tod_class = RawTimestream elif isinstance(tod, Timestream): self._Tod_class = Timestream else: raise ValueError( 'Invaid input %s, need either a RawTimestream or Timestream object' % tod) tod, full_data = self.subset_select(tod) if not full_data: tod = tod.subset(return_copy=False) return super(TimestreamTask, self).read_process_write(tod)
def process(self, ts): assert isinstance( ts, Timestream ), '%s only works for Timestream object' % self.__class__.__name__ gain_file = self.params['gain_file'] tag_input_iter = self.params['tag_input_iter'] if tag_input_iter: gain_file = input_path(gain_file, self.iteration) # read gain from file with h5py.File(gain_file, 'r') as f: gain = f['gain'][:] gain_src = f['gain'].attrs['calibrator'] gain_freq = f['gain'].attrs['freq'] gain_pol = f['gain'].attrs['pol'] gain_feed = f['gain'].attrs['feed'] ts.redistribute('baseline') feedno = ts['feedno'][:].tolist() pol = [ts.pol_dict[p] for p in ts['pol'][:]] # as string gain_pd = {'xx': 0, 'yy': 1, 0: 'xx', 1: 'yy'} # for gain related op freq = ts.freq[:] nf = len(freq) # shold check freq, pol and feed here, omit it now... for fi in range(nf): for pi in [pol.index('xx'), pol.index('yy')]: pi_ = gain_pd[pol[pi]] for bi, (fd1, fd2) in enumerate(ts['blorder'].local_data): g1 = gain[fi, pi_, feedno.index(fd1)] g2 = gain[fi, pi_, feedno.index(fd2)] if np.isfinite(g1) and np.isfinite(g2): ts.local_vis[:, fi, pi, bi] /= (g1 * np.conj(g2)) else: # mask the un-calibrated vis ts.local_vis_mask[:, fi, pi, bi] = True return super(Apply, self).process(ts)
def read_input(self): input_file = input_path(self.params['input_files']) return memh5.MemGroup.from_hdf5(input_file, distributed=True, hints=False)
def _init_input_files(self): self.input_files = input_path(format_list(self.params['input_files']))
def process(self, ts): assert isinstance( ts, Timestream ), '%s only works for Timestream object' % self.__class__.__name__ check = self.params['check'] load_data = self.params['load_data'] # cache to file if self.params['cache_to_file']: cache_file_name = input_path(self.params['cache_file_name']) if not os.path.isfile(cache_file_name): # write ts to cache_file_name cache_file_name = output_path(self.params['cache_file_name'], mkdir=True) ts.apply_mask( fill_val=0) # apply mask, fill 0 to masked values # create weight dataset weight = np.logical_not(ts.local_vis_mask).astype( np.int16) # use int16 to save memory weight = mpiarray.MPIArray.wrap(weight, axis=ts.main_data_dist_axis) axis_order = ts.main_axes_ordered_datasets[ts.main_data_name] ts.create_main_axis_ordered_dataset(axis_order, 'weight', weight, axis_order) ts.attrs['ndays'] = 1 # record number of days accumulated ts.to_files(cache_file_name) # empty ts to release memory ts.empty() else: # accumulate ts to cache_file_name ts.apply_mask( fill_val=0) # apply mask, fill 0 to masked values for rk in range(ts.nproc): if ts.rank == rk: with h5py.File(cache_file_name, 'r+') as f: # may need some check in future... if np.prod( ts['vis'].local_shape ) > 0: # no need to write if no local data slc = [] for st, ln in zip(ts['vis'].local_offset, ts['vis'].local_shape): slc.append(slice(st, st + ln)) f['vis'][tuple( slc)] += ts.local_vis # accumulate vis if 'weight' in ts.iterkeys(): f['weight'][tuple( slc)] += ts['weight'].local_data else: f['weight'][tuple(slc)] += np.logical_not( ts.local_vis_mask).astype( np.int16) # update weight f['vis_mask'][tuple(slc)] = np.where( f['weight'][tuple(slc)] != 0, False, True) # update mask if rk == 0: f.attrs['ndays'] += 1 mpiutil.barrier(ts.comm) # empty ts to release memory ts.empty() return cache_file_name # accumulate in memory # ts.redistribute('baseline') # load data from disk if load_data is set if (self.data is None) and (not load_data is None): self.data = Timestream(load_data, mode='r', start=0, stop=None, dist_axis=ts.main_data_dist_axis, use_hints=True, comm=ts.comm) self.data.load_all() if self.data is None: self.data = ts # self.data = ts.copy() self.data.apply_mask( fill_val=0) # apply mask, fill 0 to masked values # create weight dataset weight = np.logical_not(self.data.local_vis_mask).astype( np.int16) # use int16 to save memory weight = mpiarray.MPIArray.wrap(weight, axis=self.data.main_data_dist_axis) axis_order = self.data.main_axes_ordered_datasets[ self.data.main_data_name] self.data.create_main_axis_ordered_dataset(axis_order, 'weight', weight, axis_order) self.data.attrs['ndays'] = 1 # record number of days accumulated else: # make they are distributed along the same axis ts.redistribute(self.data.main_data_dist_axis) # check for ra, dec ra_self = self.data['ra_dec'].local_data[:, 0] if mpiutil.rank0 and ra_self[0] > ra_self[1]: ra_self[0] -= 2 * np.pi ra_ts = ts['ra_dec'].local_data[:, 0] if mpiutil.rank0 and ra_ts[0] > ra_ts[1]: ra_ts[0] -= 2 * np.pi delta = 2 * np.pi / self.data['ra_dec'].shape[0] if not np.allclose(ra_self, ra_ts, rtol=0, atol=delta): print 'RA not align within %f for rank %d, max gap %f' % ( delta, mpiutil.rank, np.abs(ra_ts - ra_self).max()) # assert np.allclose(self.data['ra_dec'].local_data[:, 0], ts['ra_dec'].local_data[:, 0], rtol=0, atol=2*np.pi/self.data['ra_dec'].shape[0]), 'Can not accumulate data, RA not align.' # assert np.allclose(self.data['ra_dec'].local_data[:, 1], ts['ra_dec'].local_data[:, 1]), 'Can not accumulate data with different DEC.' # other checks if required if check: assert self.data.attrs['telescope'] == ts.attrs[ 'telescope'], 'Data are observed by different telescopes %s and %s' % ( self.data.attrs['telescope'], ts.attrs['telescope']) assert np.allclose(self.data.local_freq, ts.local_freq), 'freq not align' assert len(self.data.local_pol) == len( ts.local_pol) and (self.data.local_pol == ts.local_pol).all(), 'pol not align' assert np.allclose(self.data.local_bl, ts.local_bl), 'bl not align' ts.apply_mask(fill_val=0) # apply mask, fill 0 to masked values self.data.local_vis[:] += ts.local_vis # accumulate vis if 'weight' in ts.iterkeys(): self.data['weight'].local_data[:] += ts['weight'].local_data else: self.data['weight'].local_data[:] += np.logical_not( ts.local_vis_mask).astype(np.int16) # update weight self.data.local_vis_mask[:] = np.where( self.data['weight'].local_data != 0, False, True) # update mask self.data.attrs['ndays'] += 1 return super(Accum, self).process(self.data)