def __setitem__(self, key, body): if key not in self: if ephemeris._is_skyfield_obj(body): pass elif isinstance(body, (tuple, list)) and (len(body) == 2): ra, dec = body body = ephemeris.skyfield_star_from_ra_dec(ra, dec, bd_name=key) else: ValueError("Item must be skyfield object or tuple (ra, dec).") #window = self._nsigma * cal_utils.guess_fwhm(400.0, pol='X', dec=body.dec.radians, sigma=True) window = self._nsigma * get_window(400.0, pol='X', dec=body.dec.radians, deg=True) self._entries[key] = NameSpace() self._entries[key].body = body self._entries[key].window = window self._entries[key].files = {} self._entries[key].file_span = {}
def main(config_file=None, logging_params=DEFAULT_LOGGING): # Setup logging log.setup_logging(logging_params) mlog = log.get_logger(__name__) # Set config config = DEFAULTS.deepcopy() if config_file is not None: config.merge(NameSpace(load_yaml_config(config_file))) # Set niceness current_niceness = os.nice(0) os.nice(config.niceness - current_niceness) mlog.info('Changing process niceness from %d to %d. Confirm: %d' % (current_niceness, config.niceness, os.nice(0))) # Find acquisition files acq_files = sorted(glob(os.path.join(config.data_dir, config.acq, "*.h5"))) nfiles = len(acq_files) # Determine time range of each file findex = [] tindex = [] for ii, filename in enumerate(acq_files): subdata = andata.CorrData.from_acq_h5(filename, datasets=()) findex += [ii] * subdata.ntime tindex += range(subdata.ntime) findex = np.array(findex) tindex = np.array(tindex) # Determine transits within these files transits = [] data = andata.CorrData.from_acq_h5(acq_files, datasets=()) solar_rise = ephemeris.solar_rising(data.time[0] - 24.0 * 3600.0, end_time=data.time[-1]) for rr in solar_rise: ss = ephemeris.solar_setting(rr)[0] solar_flag = np.flatnonzero((data.time >= rr) & (data.time <= ss)) if solar_flag.size > 0: solar_flag = solar_flag[::config.downsample] tval = data.time[solar_flag] this_findex = findex[solar_flag] this_tindex = tindex[solar_flag] file_list, tindices = [], [] for ii in range(nfiles): this_file = np.flatnonzero(this_findex == ii) if this_file.size > 0: file_list.append(acq_files[ii]) tindices.append(this_tindex[this_file]) date = ephemeris.unix_to_datetime(rr).strftime('%Y%m%dT%H%M%SZ') transits.append((date, tval, file_list, tindices)) # Specify some parameters for algorithm N = 2048 noffset = len(config.offsets) if config.sep_pol: rank = 1 cross_pol = False pol = np.array(['S', 'E']) pol_s = np.array( [rr + 256 * xx for xx in range(0, 8, 2) for rr in range(256)]) pol_e = np.array( [rr + 256 * xx for xx in range(1, 8, 2) for rr in range(256)]) prod_ss = [] prod_ee = [] else: rank = 8 cross_pol = config.cross_pol pol = np.array(['all']) npol = pol.size # Create file prefix and suffix prefix = [] prefix.append("gain_solutions") if config.output_prefix is not None: prefix.append(config.output_prefix) prefix = '_'.join(prefix) suffix = [] suffix.append("pol_%s" % '_'.join(pol)) suffix.append("niter_%d" % config.niter) if cross_pol: suffix.append("zerocross") else: suffix.append("keepcross") if config.normalize: suffix.append("normed") else: suffix.append("notnormed") suffix = '_'.join(suffix) # Loop over solar transits for date, timestamps, files, time_indices in transits: nfiles = len(files) mlog.info("%s (%d files) " % (date, nfiles)) output_file = os.path.join( config.output_dir, "%s_SUN_%s_%s.pickle" % (prefix, date, suffix)) mlog.info("Saving to: %s" % output_file) # Get info about this set of files data = andata.CorrData.from_acq_h5(files, datasets=['flags/inputs']) prod = data.prod coord = sun_coord(timestamps, deg=True) fstart = config.freq_start if config.freq_start is not None else 0 fstop = config.freq_stop if config.freq_stop is not None else data.freq.size freq_index = range(fstart, fstop) freq = data.freq[freq_index] ntime = timestamps.size nfreq = freq.size # Determind bad inputs if config.bad_input_file is None or not os.path.isfile( config.bad_input_file): bad_input = np.flatnonzero( ~np.all(data.flags['inputs'][:], axis=-1)) else: with open(config.bad_input_file, 'r') as handler: bad_input = pickle.load(handler) mlog.info("%d inputs flagged as bad." % bad_input.size) bad_prod = np.array([ ii for ii, pp in enumerate(prod) if (pp[0] in bad_input) or (pp[1] in bad_input) ]) # Create arrays to hold the results ores = {} ores['date'] = date ores['coord'] = coord ores['time'] = timestamps ores['freq'] = freq ores['offsets'] = config.offsets ores['pol'] = pol ores['evalue'] = np.zeros((noffset, nfreq, ntime, N), dtype=np.float32) ores['resp'] = np.zeros((noffset, nfreq, ntime, N, config.neigen), dtype=np.complex64) ores['resp_err'] = np.zeros((noffset, nfreq, ntime, N, config.neigen), dtype=np.float32) # Loop over frequencies for ff, find in enumerate(freq_index): mlog.info("Freq %d of %d. %0.2f MHz." % (ff + 1, nfreq, freq[ff])) cnt = 0 # Loop over files for ii, (filename, tind) in enumerate(zip(files, time_indices)): ntind = len(tind) mlog.info("Processing file %s (%d time samples)" % (filename, ntind)) # Loop over times for tt in tind: t0 = time.time() mlog.info("Time %d of %d. %d index of current file." % (cnt + 1, ntime, tt)) # Load visibilities with h5py.File(filename, 'r') as hf: vis = hf['vis'][find, :, tt] # Set bad products equal to zero vis[bad_prod] = 0.0 # Different code if we are separating polarisations if config.sep_pol: if not any(prod_ss): for pind, pp in enumerate(prod): if (pp[0] in pol_s) and (pp[1] in pol_s): prod_ss.append(pind) elif (pp[0] in pol_e) and (pp[1] in pol_e): prod_ee.append(pind) prod_ss = np.array(prod_ss) prod_ee = np.array(prod_ee) mlog.info("Product sizes: %d, %d" % (prod_ss.size, prod_ee.size)) # Loop over polarisations for pp, (input_pol, prod_pol) in enumerate([(pol_s, prod_ss), (pol_e, prod_ee)]): visp = vis[prod_pol] mlog.info("pol %s, visibility size: %d" % (pol[pp], visp.size)) # Loop over offsets for oo, off in enumerate(config.offsets): mlog.info( "pol %s, rank %d, niter %d, offset %d, cross_pol %s, neigen %d" % (pol[pp], rank, config.niter, off, cross_pol, config.neigen)) ev, rr, rre = solve_gain( visp, cutoff=off, cross_pol=cross_pol, normalize=config.normalize, rank=rank, niter=config.niter, neigen=config.neigen) ores['evalue'][oo, ff, cnt, input_pol] = ev ores['resp'][oo, ff, cnt, input_pol, :] = rr ores['resp_err'][oo, ff, cnt, input_pol, :] = rre else: # Loop over offsets for oo, off in enumerate(config.offsets): mlog.info( "rank %d, niter %d, offset %d, cross_pol %s, neigen %d" % (rank, config.niter, off, cross_pol, config.neigen)) ev, rr, rre = solve_gain( vis, cutoff=off, cross_pol=cross_pol, normalize=config.normalize, rank=rank, niter=config.niter, neigen=config.neigen) ores['evalue'][oo, ff, cnt, :] = ev ores['resp'][oo, ff, cnt, :, :] = rr ores['resp_err'][oo, ff, cnt, :, :] = rre # Increment time counter cnt += 1 # Print time elapsed mlog.info("Took %0.1f seconds." % (time.time() - t0, )) # Save to pickle file with open(output_file, 'w') as handle: pickle.dump(ores, handle)
import caput.time as ctime import time import skyfield.api from ch_util import tools, ephemeris, andata from ch_util.fluxcat import FluxCatalog sys.path.insert(0, "/home/ssiegel/ch_pipeline/venv/src/draco") from draco.util import _fast_tools ################################################### # default variables ################################################### DEFAULTS = NameSpace( load_yaml_config( os.path.join(os.path.dirname(os.path.realpath(__file__)), 'defaults.yaml') + ':n2cal')) LOG_FILE = os.environ.get( 'N2CAL_LOG_FILE', os.path.join(os.path.dirname(os.path.realpath(__file__)), 'n2cal.log')) DEFAULT_LOGGING = { 'formatters': { 'std': { 'format': "%(asctime)s %(levelname)s %(name)s: %(message)s", 'datefmt': "%m/%d %H:%M:%S" }, }, 'handlers': { 'stderr': {
def offline_point_source_calibration(file_list, source, inputmap=None, start=None, stop=None, physical_freq=None, tcorr=None, logging_params=DEFAULT_LOGGING, **kwargs): # Load config config = DEFAULTS.deepcopy() config.merge(NameSpace(kwargs)) # Setup logging log.setup_logging(logging_params) mlog = log.get_logger(__name__) mlog.info("ephemeris file: %s" % ephemeris.__file__) # Set the model to use fitter_function = utils.fit_point_source_transit model_function = utils.model_point_source_transit farg = inspect.getargspec(fitter_function) defaults = { key: val for key, val in zip(farg.args[-len(farg.defaults):], farg.defaults) } poly_deg_amp = kwargs.get('poly_deg_amp', defaults['poly_deg_amp']) poly_deg_phi = kwargs.get('poly_deg_phi', defaults['poly_deg_phi']) poly_type = kwargs.get('poly_type', defaults['poly_type']) param_name = ([ '%s_poly_amp_coeff%d' % (poly_type, cc) for cc in range(poly_deg_amp + 1) ] + [ '%s_poly_phi_coeff%d' % (poly_type, cc) for cc in range(poly_deg_phi + 1) ]) model_kwargs = [('poly_deg_amp', poly_deg_amp), ('poly_deg_phi', poly_deg_phi), ('poly_type', poly_type)] model_name = '.'.join( [getattr(model_function, key) for key in ['__module__', '__name__']]) tval = {} # Set where to evaluate gain ha_eval_str = ['raw_transit'] if config.multi_sample: ha_eval_str += ['transit', 'peak'] ha_eval = [0.0, None] fitslc = slice(1, 3) ind_eval = ha_eval_str.index(config.evaluate_gain_at) # Determine dimensions direction = ['amp', 'phi'] nparam = len(param_name) ngain = len(ha_eval_str) ndir = len(direction) # Determine frequencies data = andata.CorrData.from_acq_h5(file_list, datasets=(), start=start, stop=stop) freq = data.freq if physical_freq is not None: index_freq = np.array( [np.argmin(np.abs(ff - freq)) for ff in physical_freq]) freq_sel = utils.convert_to_slice(index_freq) freq = freq[index_freq] else: index_freq = np.arange(freq.size) freq_sel = None nfreq = freq.size # Compute flux of source inv_rt_flux_density = tools.invert_no_zero( np.sqrt(FluxCatalog[source].predict_flux(freq))) # Read in the eigenvaluess for all frequencies data = andata.CorrData.from_acq_h5(file_list, datasets=['erms', 'eval'], freq_sel=freq_sel, start=start, stop=stop) # Determine source coordinates this_csd = np.floor(ephemeris.unix_to_csd(np.median(data.time))) timestamp0 = ephemeris.transit_times(FluxCatalog[source].skyfield, ephemeris.csd_to_unix(this_csd))[0] src_ra, src_dec = ephemeris.object_coords(FluxCatalog[source].skyfield, date=timestamp0, deg=True) ra = ephemeris.lsa(data.time) ha = ra - src_ra ha = ha - (ha > 180.0) * 360.0 + (ha < -180.0) * 360.0 ha = np.radians(ha) itrans = np.argmin(np.abs(ha)) window = 0.75 * np.max(np.abs(ha)) off_source = np.abs(ha) > window mlog.info("CSD %d" % this_csd) mlog.info("Hour angle at transit (%d of %d): %0.2f deg " % (itrans, len(ha), np.degrees(ha[itrans]))) mlog.info("Hour angle off source: %0.2f deg" % np.median(np.abs(np.degrees(ha[off_source])))) src_dec = np.radians(src_dec) lat = np.radians(ephemeris.CHIMELATITUDE) # Determine division of frequencies ninput = data.ninput ntime = data.ntime nblock_freq = int(np.ceil(nfreq / float(config.nfreq_per_block))) # Determine bad inputs eps = 10.0 * np.finfo(data['erms'].dtype).eps good_freq = np.flatnonzero(np.all(data['erms'][:] > eps, axis=-1)) ind_sub_freq = good_freq[slice(0, good_freq.size, max(int(good_freq.size / 10), 1))] tmp_data = andata.CorrData.from_acq_h5(file_list, datasets=['evec'], freq_sel=ind_sub_freq, start=start, stop=stop) eps = 10.0 * np.finfo(tmp_data['evec'].dtype).eps bad_input = np.flatnonzero( np.all(np.abs(tmp_data['evec'][:, 0]) < eps, axis=(0, 2))) input_axis = tmp_data.input.copy() del tmp_data # Query layout database for correlator inputs if inputmap is None: inputmap = tools.get_correlator_inputs( datetime.datetime.utcfromtimestamp(data.time[itrans]), correlator='chime') inputmap = tools.reorder_correlator_inputs(input_axis, inputmap) tools.change_chime_location(rotation=config.telescope_rotation) # Determine x and y pol index xfeeds = np.array([ idf for idf, inp in enumerate(inputmap) if (idf not in bad_input) and tools.is_array_x(inp) ]) yfeeds = np.array([ idf for idf, inp in enumerate(inputmap) if (idf not in bad_input) and tools.is_array_y(inp) ]) nfeed = xfeeds.size + yfeeds.size pol = [yfeeds, xfeeds] polstr = ['Y', 'X'] npol = len(pol) neigen = min(max(npol, config.neigen), data['eval'].shape[1]) phase_ref = config.phase_reference_index phase_ref_by_pol = [ pol[pp].tolist().index(phase_ref[pp]) for pp in range(npol) ] # Calculate dynamic range eval0_off_source = np.median(data['eval'][:, 0, off_source], axis=-1) dyn = data['eval'][:, 1, :] * tools.invert_no_zero( eval0_off_source[:, np.newaxis]) # Determine frequencies to mask not_rfi = np.ones((nfreq, 1), dtype=np.bool) if config.mask_rfi is not None: for frng in config.mask_rfi: not_rfi[:, 0] &= ((freq < frng[0]) | (freq > frng[1])) mlog.info("%0.1f percent of frequencies available after masking RFI." % (100.0 * np.sum(not_rfi, dtype=np.float32) / float(nfreq), )) #dyn_flg = utils.contiguous_flag(dyn > config.dyn_rng_threshold, centre=itrans) if source in config.dyn_rng_threshold: dyn_rng_threshold = config.dyn_rng_threshold[source] else: dyn_rng_threshold = config.dyn_rng_threshold.default mlog.info("Dynamic range threshold set to %0.1f." % dyn_rng_threshold) dyn_flg = dyn > dyn_rng_threshold # Calculate fit flag fit_flag = np.zeros((nfreq, npol, ntime), dtype=np.bool) for pp in range(npol): mlog.info("Dynamic Range Nsample, Pol %d: %s" % (pp, ','.join([ "%d" % xx for xx in np.percentile(np.sum(dyn_flg, axis=-1), [25, 50, 75, 100]) ]))) if config.nsigma1 is None: fit_flag[:, pp, :] = dyn_flg & not_rfi else: fit_window = config.nsigma1 * np.radians( utils.get_window(freq, pol=polstr[pp], dec=src_dec, deg=True)) win_flg = np.abs(ha)[np.newaxis, :] <= fit_window[:, np.newaxis] fit_flag[:, pp, :] = (dyn_flg & win_flg & not_rfi) # Calculate base error base_err = data['erms'][:, np.newaxis, :] # Check for sign flips ref_resp = andata.CorrData.from_acq_h5(file_list, datasets=['evec'], input_sel=config.eigen_reference, freq_sel=freq_sel, start=start, stop=stop)['evec'][:, 0:neigen, 0, :] sign0 = 1.0 - 2.0 * (ref_resp.real < 0.0) # Check that we have the correct reference feed if np.any(np.abs(ref_resp.imag) > 0.0): ValueError("Reference feed %d is incorrect." % config.eigen_reference) del ref_resp # Save index_map results = {} results['model'] = model_name results['param'] = param_name results['freq'] = data.index_map['freq'][:] results['input'] = input_axis results['eval'] = ha_eval_str results['dir'] = direction for key, val in model_kwargs: results[key] = val # Initialize numpy arrays to hold results if config.return_response: results['response'] = np.zeros((nfreq, ninput, ntime), dtype=np.complex64) results['response_err'] = np.zeros((nfreq, ninput, ntime), dtype=np.float32) results['fit_flag'] = fit_flag results['ha_axis'] = ha results['ra'] = ra else: results['gain_eval'] = np.zeros((nfreq, ninput, ngain), dtype=np.complex64) results['weight_eval'] = np.zeros((nfreq, ninput, ngain), dtype=np.float32) results['frac_gain_err'] = np.zeros((nfreq, ninput, ngain, ndir), dtype=np.float32) results['parameter'] = np.zeros((nfreq, ninput, nparam), dtype=np.float32) results['parameter_err'] = np.zeros((nfreq, ninput, nparam), dtype=np.float32) results['index_eval'] = np.full((nfreq, ninput), -1, dtype=np.int8) results['gain'] = np.zeros((nfreq, ninput), dtype=np.complex64) results['weight'] = np.zeros((nfreq, ninput), dtype=np.float32) results['ndof'] = np.zeros((nfreq, ninput, ndir), dtype=np.float32) results['chisq'] = np.zeros((nfreq, ninput, ndir), dtype=np.float32) results['timing'] = np.zeros((nfreq, ninput), dtype=np.complex64) # Initialize metric like variables results['runtime'] = np.zeros((nblock_freq, 2), dtype=np.float64) # Compute distances dist = tools.get_feed_positions(inputmap) for pp, feeds in enumerate(pol): dist[feeds, :] -= dist[phase_ref[pp], np.newaxis, :] # Loop over frequency blocks for gg in range(nblock_freq): mlog.info("Frequency block %d of %d." % (gg, nblock_freq)) fstart = gg * config.nfreq_per_block fstop = min((gg + 1) * config.nfreq_per_block, nfreq) findex = np.arange(fstart, fstop) ngroup = findex.size freq_sel = utils.convert_to_slice(index_freq[findex]) timeit_start_gg = time.time() # if config.return_response: gstart = start gstop = stop tslc = slice(0, ntime) else: good_times = np.flatnonzero(np.any(fit_flag[findex], axis=(0, 1))) if good_times.size == 0: continue gstart = int(np.min(good_times)) gstop = int(np.max(good_times)) + 1 tslc = slice(gstart, gstop) gstart += start gstop += start hag = ha[tslc] itrans = np.argmin(np.abs(hag)) # Load eigenvectors. nudata = andata.CorrData.from_acq_h5( file_list, datasets=['evec', 'vis', 'flags/vis_weight'], apply_gain=False, freq_sel=freq_sel, start=gstart, stop=gstop) # Save time to load data results['runtime'][gg, 0] = time.time() - timeit_start_gg timeit_start_gg = time.time() mlog.info("Time to load (per frequency): %0.3f sec" % (results['runtime'][gg, 0] / ngroup, )) # Loop over polarizations for pp, feeds in enumerate(pol): # Get timing correction if tcorr is not None: tgain = tcorr.get_gain(nudata.freq, nudata.input[feeds], nudata.time) tgain *= tgain[:, phase_ref_by_pol[pp], np.newaxis, :].conj() tgain_transit = tgain[:, :, itrans].copy() tgain *= tgain_transit[:, :, np.newaxis].conj() # Create the polarization masking vector P = np.zeros((1, ninput, 1), dtype=np.float64) P[:, feeds, :] = 1.0 # Loop over frequencies for gff, ff in enumerate(findex): flg = fit_flag[ff, pp, tslc] if (2 * int(np.sum(flg))) < (nparam + 1) and not config.return_response: continue # Normalize by eigenvalue and correct for pi phase flips in process. resp = (nudata['evec'][gff, 0:neigen, :, :] * np.sqrt(data['eval'][ff, 0:neigen, np.newaxis, tslc]) * sign0[ff, :, np.newaxis, tslc]) # Rotate to single-pol response # Move time to first axis for the matrix multiplication invL = tools.invert_no_zero( np.rollaxis(data['eval'][ff, 0:neigen, np.newaxis, tslc], -1, 0)) UT = np.rollaxis(resp, -1, 0) U = np.swapaxes(UT, -1, -2) mu, vp = np.linalg.eigh(np.matmul(UT.conj(), P * U)) rsign0 = (1.0 - 2.0 * (vp[:, 0, np.newaxis, :].real < 0.0)) resp = mu[:, np.newaxis, :] * np.matmul(U, rsign0 * vp * invL) # Extract feeds of this pol # Transpose so that time is back to last axis resp = resp[:, feeds, -1].T # Compute error on response dataflg = ((nudata.weight[gff, feeds, :] > 0.0) & np.isfinite(nudata.weight[gff, feeds, :])).astype( np.float32) resp_err = dataflg * base_err[ff, :, tslc] * np.sqrt( nudata.vis[gff, feeds, :].real) * tools.invert_no_zero( np.sqrt(mu[np.newaxis, :, -1])) # Reference to specific input resp *= np.exp( -1.0J * np.angle(resp[phase_ref_by_pol[pp], np.newaxis, :])) # Apply timing correction if tcorr is not None: resp *= tgain[gff] results['timing'][ff, feeds] = tgain_transit[gff] # Fringestop lmbda = scipy.constants.c * 1e-6 / nudata.freq[gff] resp *= tools.fringestop_phase( hag[np.newaxis, :], lat, src_dec, dist[feeds, 0, np.newaxis] / lmbda, dist[feeds, 1, np.newaxis] / lmbda) # Normalize by source flux resp *= inv_rt_flux_density[ff] resp_err *= inv_rt_flux_density[ff] # If requested, reference phase to the median value if config.med_phase_ref: phi0 = np.angle(resp[:, itrans, np.newaxis]) resp *= np.exp(-1.0J * phi0) resp *= np.exp( -1.0J * np.median(np.angle(resp), axis=0, keepdims=True)) resp *= np.exp(1.0J * phi0) # Check if return_response flag was set by user if not config.return_response: if config.multi_sample: moving_window = config.nsigma2 and config.nsigma2 * np.radians( utils.get_window(nudata.freq[gff], pol=polstr[pp], dec=src_dec, deg=True)) # Loop over inputs for pii, ii in enumerate(feeds): is_good = flg & (np.abs(resp[pii, :]) > 0.0) & (resp_err[pii, :] > 0.0) # Set the intial gains based on raw response at transit if is_good[itrans]: results['gain_eval'][ff, ii, 0] = tools.invert_no_zero( resp[pii, itrans]) results['frac_gain_err'][ff, ii, 0, :] = ( resp_err[pii, itrans] * tools.invert_no_zero( np.abs(resp[pii, itrans]))) results['weight_eval'][ff, ii, 0] = 0.5 * ( np.abs(resp[pii, itrans])**2 * tools.invert_no_zero(resp_err[pii, itrans]))**2 results['index_eval'][ff, ii] = 0 results['gain'][ff, ii] = results['gain_eval'][ff, ii, 0] results['weight'][ff, ii] = results['weight_eval'][ff, ii, 0] # Exit if not performing multi time sample fit if not config.multi_sample: continue if (2 * int(np.sum(is_good))) < (nparam + 1): continue try: param, param_err, gain, gain_err, ndof, chisq, tval = fitter_function( hag[is_good], resp[pii, is_good], resp_err[pii, is_good], ha_eval, window=moving_window, tval=tval, **config.fit) except Exception as rex: if config.verbose: mlog.info( "Frequency %0.2f, Feed %d failed with error: %s" % (nudata.freq[gff], ii, rex)) continue # Check for nan wfit = (np.abs(gain) * tools.invert_no_zero(np.abs(gain_err)))**2 if np.any(~np.isfinite(np.abs(gain))) or np.any( ~np.isfinite(wfit)): continue # Save to results using the convention that you should *multiply* the visibilites by the gains results['gain_eval'][ ff, ii, fitslc] = tools.invert_no_zero(gain) results['frac_gain_err'][ff, ii, fitslc, 0] = gain_err.real results['frac_gain_err'][ff, ii, fitslc, 1] = gain_err.imag results['weight_eval'][ff, ii, fitslc] = wfit results['parameter'][ff, ii, :] = param results['parameter_err'][ff, ii, :] = param_err results['ndof'][ff, ii, :] = ndof results['chisq'][ff, ii, :] = chisq # Check if the fit was succesful and update the gain evaluation index appropriately if np.all((chisq / ndof.astype(np.float32) ) <= config.chisq_per_dof_threshold): results['index_eval'][ff, ii] = ind_eval results['gain'][ff, ii] = results['gain_eval'][ ff, ii, ind_eval] results['weight'][ff, ii] = results['weight_eval'][ ff, ii, ind_eval] else: # Return response only (do not fit model) results['response'][ff, feeds, :] = resp results['response_err'][ff, feeds, :] = resp_err # Save time to fit data results['runtime'][gg, 1] = time.time() - timeit_start_gg mlog.info("Time to fit (per frequency): %0.3f sec" % (results['runtime'][gg, 1] / ngroup, )) # Clean up del nudata gc.collect() # Print total run time mlog.info("TOTAL TIME TO LOAD: %0.3f min" % (np.sum(results['runtime'][:, 0]) / 60.0, )) mlog.info("TOTAL TIME TO FIT: %0.3f min" % (np.sum(results['runtime'][:, 1]) / 60.0, )) # Set the best estimate of the gain if not config.return_response: flag = results['index_eval'] >= 0 gain = results['gain'] # Compute amplitude amp = np.abs(gain) # Hard cutoffs on the amplitude med_amp = np.median(amp[flag]) min_amp = med_amp * config.min_amp_scale_factor max_amp = med_amp * config.max_amp_scale_factor flag &= ((amp >= min_amp) & (amp <= max_amp)) # Flag outliers in amplitude for each frequency for pp, feeds in enumerate(pol): med_amp_by_pol = np.zeros(nfreq, dtype=np.float32) sig_amp_by_pol = np.zeros(nfreq, dtype=np.float32) for ff in range(nfreq): this_flag = flag[ff, feeds] if np.any(this_flag): med, slow, shigh = utils.estimate_directional_scale( amp[ff, feeds[this_flag]]) lower = med - config.nsigma_outlier * slow upper = med + config.nsigma_outlier * shigh flag[ff, feeds] &= ((amp[ff, feeds] >= lower) & (amp[ff, feeds] <= upper)) med_amp_by_pol[ff] = med sig_amp_by_pol[ff] = 0.5 * (shigh - slow) / np.sqrt( np.sum(this_flag, dtype=np.float32)) if config.nsigma_med_outlier: med_flag = med_amp_by_pol > 0.0 not_outlier = flag_outliers(med_amp_by_pol, med_flag, window=config.window_med_outlier, nsigma=config.nsigma_med_outlier) flag[:, feeds] &= not_outlier[:, np.newaxis] mlog.info("Pol %s: %d frequencies are outliers." % (polstr[pp], np.sum(~not_outlier & med_flag, dtype=np.int))) # Determine bad frequencies flag_freq = (np.sum(flag, axis=1, dtype=np.float32) / float(ninput)) > config.threshold_good_freq good_freq = np.flatnonzero(flag_freq) # Determine bad inputs fraction_good = np.sum(flag[good_freq, :], axis=0, dtype=np.float32) / float(good_freq.size) flag_input = fraction_good > config.threshold_good_input # Finalize flag flag &= (flag_freq[:, np.newaxis] & flag_input[np.newaxis, :]) # Interpolate gains interp_gain, interp_weight = interpolate_gain( freq, gain, results['weight'], flag=flag, length_scale=config.interpolation_length_scale, mlog=mlog) # Save gains to object results['flag'] = flag results['gain'] = interp_gain results['weight'] = interp_weight # Return results return results
from sklearn.gaussian_process.kernels import Matern, ConstantKernel import log from pychfpga import NameSpace, load_yaml_config from calibration import utils from ch_util import andata, tools, ephemeris, timing from ch_util.fluxcat import FluxCatalog ################################################### # default variables ################################################### DEFAULTS = NameSpace( load_yaml_config( os.path.join(os.path.dirname(os.path.realpath(__file__)), 'defaults.yaml') + ':point_source.analysis')) LOG_FILE = os.environ.get( 'CALIBRATION_LOG_FILE', os.path.join(os.path.dirname(os.path.realpath(__file__)), 'offline_cal.log')) DEFAULT_LOGGING = { 'formatters': { 'std': { 'format': "%(asctime)s %(levelname)s %(name)s: %(message)s", 'datefmt': "%m/%d %H:%M:%S" }, }, 'handlers': {
def main(config_file=None, logging_params=DEFAULT_LOGGING): # Setup logging log.setup_logging(logging_params) mlog = log.get_logger(__name__) # Set config config = DEFAULTS.deepcopy() if config_file is not None: config.merge(NameSpace(load_yaml_config(config_file))) # Set niceness current_niceness = os.nice(0) os.nice(config.niceness - current_niceness) mlog.info('Changing process niceness from %d to %d. Confirm: %d' % (current_niceness, config.niceness, os.nice(0))) # Find acquisition files acq_files = sorted(glob(os.path.join(config.data_dir, config.acq, "*.h5"))) nfiles = len(acq_files) # Determine time range of each file findex = [] tindex = [] for ii, filename in enumerate(acq_files): subdata = andata.CorrData.from_acq_h5(filename, datasets=()) findex += [ii] * subdata.ntime tindex += range(subdata.ntime) findex = np.array(findex) tindex = np.array(tindex) # Determine transits within these files transits = [] data = andata.CorrData.from_acq_h5(acq_files, datasets=()) solar_rise = ephemeris.solar_rising(data.time[0] - 24.0 * 3600.0, end_time=data.time[-1]) for rr in solar_rise: ss = ephemeris.solar_setting(rr)[0] solar_flag = np.flatnonzero((data.time >= rr) & (data.time <= ss)) if solar_flag.size > 0: solar_flag = solar_flag[::config.downsample] tval = data.time[solar_flag] this_findex = findex[solar_flag] this_tindex = tindex[solar_flag] file_list, tindices = [], [] for ii in range(nfiles): this_file = np.flatnonzero(this_findex == ii) if this_file.size > 0: file_list.append(acq_files[ii]) tindices.append(this_tindex[this_file]) date = ephemeris.unix_to_datetime(rr).strftime('%Y%m%dT%H%M%SZ') transits.append((date, tval, file_list, tindices)) # Create file prefix and suffix prefix = [] prefix.append("redundant_calibration") if config.output_prefix is not None: prefix.append(config.output_prefix) prefix = '_'.join(prefix) suffix = [] if config.include_auto: suffix.append("wauto") else: suffix.append("noauto") if config.include_intracyl: suffix.append("wintra") else: suffix.append("nointra") if config.fix_degen: suffix.append("fixed_degen") else: suffix.append("degen") suffix = '_'.join(suffix) # Loop over solar transits for date, timestamps, files, time_indices in transits: nfiles = len(files) mlog.info("%s (%d files) " % (date, nfiles)) output_file = os.path.join(config.output_dir, "%s_SUN_%s_%s.h5" % (prefix, date, suffix)) mlog.info("Saving to: %s" % output_file) # Get info about this set of files data = andata.CorrData.from_acq_h5(files, datasets=['flags/inputs'], apply_gain=False, renormalize=False) coord = sun_coord(timestamps, deg=True) fstart = config.freq_start if config.freq_start is not None else 0 fstop = config.freq_stop if config.freq_stop is not None else data.freq.size freq_index = range(fstart, fstop) freq = data.freq[freq_index] ntime = timestamps.size nfreq = freq.size # Determind bad inputs if config.bad_input_file is None or not os.path.isfile( config.bad_input_file): bad_input = np.flatnonzero( ~np.all(data.flags['inputs'][:], axis=-1)) else: with open(config.bad_input_file, 'r') as handler: bad_input = pickle.load(handler) mlog.info("%d inputs flagged as bad." % bad_input.size) nant = data.ninput # Determine polarization product maps dbinputs = tools.get_correlator_inputs(ephemeris.unix_to_datetime( timestamps[0]), correlator='chime') dbinputs = tools.reorder_correlator_inputs(data.input, dbinputs) feedpos = tools.get_feed_positions(dbinputs) prod = defaultdict(list) dist = defaultdict(list) for pp, this_prod in enumerate(data.prod): aa, bb = this_prod inp_aa = dbinputs[aa] inp_bb = dbinputs[bb] if (aa in bad_input) or (bb in bad_input): continue if not tools.is_chime(inp_aa) or not tools.is_chime(inp_bb): continue if not config.include_intracyl and (inp_aa.cyl == inp_bb.cyl): continue if not config.include_auto and (aa == bb): continue this_dist = list(feedpos[aa, :] - feedpos[bb, :]) if tools.is_array_x(inp_aa) and tools.is_array_x(inp_bb): key = 'XX' elif tools.is_array_y(inp_aa) and tools.is_array_y(inp_bb): key = 'YY' elif not config.include_crosspol: continue elif tools.is_array_x(inp_aa) and tools.is_array_y(inp_bb): key = 'XY' elif tools.is_array_y(inp_aa) and tools.is_array_x(inp_bb): key = 'YX' else: raise RuntimeError("CHIME feeds not polarized.") prod[key].append(pp) dist[key].append(this_dist) polstr = sorted(prod.keys()) polcnt = 0 pol_sky_id = [] bmap = {} for key in polstr: prod[key] = np.array(prod[key]) dist[key] = np.array(dist[key]) p_bmap, p_ubaseline = generate_mapping(dist[key]) nubase = p_ubaseline.shape[0] bmap[key] = p_bmap + polcnt if polcnt > 0: ubaseline = np.concatenate((ubaseline, p_ubaseline), axis=0) pol_sky_id += [key] * nubase else: ubaseline = p_ubaseline.copy() pol_sky_id = [key] * nubase polcnt += nubase mlog.info("%d unique baselines" % polcnt) nsky = ubaseline.shape[0] # Create arrays to hold the results ores = {} ores['freq'] = freq ores['input'] = data.input ores['time'] = timestamps ores['coord'] = coord ores['pol'] = np.array(pol_sky_id) ores['baseline'] = ubaseline # Create array to hold gain results ores['gain'] = np.zeros((nfreq, nant, ntime), dtype=np.complex) ores['sky'] = np.zeros((nfreq, nsky, ntime), dtype=np.complex) ores['err'] = np.zeros((nfreq, nant + nsky, ntime, 2), dtype=np.float) # Loop over polarisations for key in polstr: reverse_map = bmap[key] p_prod = prod[key] isort = np.argsort(reverse_map) p_prod = p_prod[isort] p_ant1 = data.prod['input_a'][p_prod] p_ant2 = data.prod['input_b'][p_prod] p_vismap = reverse_map[isort] # Find the redundant groups tmp = np.where(np.diff(p_vismap) != 0)[0] edges = np.zeros(2 + tmp.size, dtype='int') edges[0] = 0 edges[1:-1] = tmp + 1 edges[-1] = p_vismap.size kept_base = np.unique(p_vismap) # Determine the unique antennas kept_ants = np.unique(np.concatenate([p_ant1, p_ant2])) antmap = np.zeros(kept_ants.max() + 1, dtype='int') - 1 p_nant = kept_ants.size for i in range(p_nant): antmap[kept_ants[i]] = i p_ant1_use = antmap[p_ant1].copy() p_ant2_use = antmap[p_ant2].copy() # Create matrix p_nvis = p_prod.size nred = edges.size - 1 npar = p_nant + nred A = np.zeros((p_nvis, npar), dtype=np.float32) B = np.zeros((p_nvis, npar), dtype=np.float32) for kk in range(p_nant): flag_ant1 = p_ant1_use == kk if np.any(flag_ant1): A[flag_ant1, kk] = 1.0 B[flag_ant1, kk] = 1.0 flag_ant2 = p_ant2_use == kk if np.any(flag_ant2): A[flag_ant2, kk] = 1.0 B[flag_ant2, kk] = -1.0 for ee in range(nred): A[edges[ee]:edges[ee + 1], p_nant + ee] = 1.0 B[edges[ee]:edges[ee + 1], p_nant + ee] = 1.0 # Add equations to break degeneracy if config.fix_degen: A = np.concatenate((A, np.zeros((1, npar), dtype=np.float32))) A[-1, 0:p_nant] = 1.0 B = np.concatenate((B, np.zeros((3, npar), dtype=np.float32))) B[-3, 0:p_nant] = 1.0 B[-2, 0:p_nant] = feedpos[kept_ants, 0] B[-1, 0:p_nant] = feedpos[kept_ants, 1] # Loop over frequencies for ff, find in enumerate(freq_index): mlog.info("Freq %d of %d. %0.2f MHz." % (ff + 1, nfreq, freq[ff])) cnt = 0 # Loop over files for ii, (filename, tind) in enumerate(zip(files, time_indices)): ntind = len(tind) mlog.info("Processing file %s (%d time samples)" % (filename, ntind)) # Compute noise weight with h5py.File(filename, 'r') as hf: wnoise = np.median(hf['flags/vis_weight'][find, :, :], axis=-1) # Loop over times for tt in tind: t0 = time.time() mlog.info("Time %d of %d. %d index of current file." % (cnt + 1, ntime, tt)) # Load visibilities with h5py.File(filename, 'r') as hf: snap = hf['vis'][find, :, tt] wsnap = wnoise * ( (hf['flags/vis_weight'][find, :, tt] > 0.0) & (np.abs(snap) > 0.0)).astype(np.float32) # Extract relevant products for this polarization snap = snap[p_prod] wsnap = wsnap[p_prod] # Turn into amplitude and phase, avoiding NaN mask = (wsnap > 0.0) amp = np.where(mask, np.log(np.abs(snap)), 0.0) phi = np.where(mask, np.angle(snap), 0.0) # Deal with phase wrapping for aa, bb in zip(edges[:-1], edges[1:]): dphi = phi[aa:bb] - np.sort(phi[aa:bb])[int( (bb - aa) / 2)] phi[aa:bb] += (2.0 * np.pi * (dphi < -np.pi) - 2.0 * np.pi * (dphi > np.pi)) # Add elements to fix degeneracy if config.fix_degen: amp = np.concatenate((amp, np.zeros(1))) phi = np.concatenate((phi, np.zeros(3))) # Determine noise matrix inv_diagC = wsnap * np.abs(snap)**2 * 2.0 if config.fix_degen: inv_diagC = np.concatenate((inv_diagC, np.ones(1))) # Amplitude estimate and covariance amp_param_cov = np.linalg.inv( np.dot(A.T, inv_diagC[:, np.newaxis] * A)) amp_param = np.dot(amp_param_cov, np.dot(A.T, inv_diagC * amp)) # Phase estimate and covariance if config.fix_degen: inv_diagC = np.concatenate((inv_diagC, np.ones(2))) phi_param_cov = np.linalg.inv( np.dot(B.T, inv_diagC[:, np.newaxis] * B)) phi_param = np.dot(phi_param_cov, np.dot(B.T, inv_diagC * phi)) # Save to large array ores['gain'][ff, kept_ants, cnt] = np.exp(amp_param[0:p_nant] + 1.0J * phi_param[0:p_nant]) ores['sky'][ff, kept_base, cnt] = np.exp(amp_param[p_nant:] + 1.0J * phi_param[p_nant:]) ores['err'][ff, kept_ants, cnt, 0] = np.diag(amp_param_cov[0:p_nant, 0:p_nant]) ores['err'][ff, nant + kept_base, cnt, 0] = np.diag(amp_param_cov[p_nant:, p_nant:]) ores['err'][ff, kept_ants, cnt, 1] = np.diag(phi_param_cov[0:p_nant, 0:p_nant]) ores['err'][ff, nant + kept_base, cnt, 1] = np.diag(phi_param_cov[p_nant:, p_nant:]) # Increment time counter cnt += 1 # Print time elapsed mlog.info("Took %0.1f seconds." % (time.time() - t0, )) # Save to pickle file with h5py.File(output_file, 'w') as handler: handler.attrs['date'] = date for key, val in ores.iteritems(): handler.create_dataset(key, data=val)
def main(config_file=None, logging_params=DEFAULT_LOGGING): # Setup logging log.setup_logging(logging_params) mlog = log.get_logger(__name__) # Set config config = DEFAULTS.deepcopy() if config_file is not None: config.merge(NameSpace(load_yaml_config(config_file))) # Create transit tracker source_list = FluxCatalog.sort( ) if not config.source_list else config.source_list cal_list = [ name for name, obj in FluxCatalog.iteritems() if (obj.dec >= config.min_dec) and ( obj.predict_flux(config.freq_nominal) >= config.min_flux) and ( name in source_list) ] if not cal_list: raise RuntimeError("No calibrators found.") # Sort list by flux at nominal frequency cal_list.sort( key=lambda name: FluxCatalog[name].predict_flux(config.freq_nominal)) # Add to transit tracker transit_tracker = containers.TransitTrackerOffline( nsigma=config.nsigma_source, extend_night=config.extend_night) for name in cal_list: transit_tracker[name] = FluxCatalog[name].skyfield mlog.info("Initializing offline point source processing.") search_time = config.start_time or 0 # Find all calibration files all_files = sorted( glob.glob( os.path.join(config.acq_dir, '*' + config.correlator + config.acq_suffix, '*.h5'))) if not all_files: return # Remove files whose last modified time is before the time of the most recent update all_files = [ ff for ff in all_files if (os.path.getmtime(ff) > search_time) ] if not all_files: return # Remove files that are currently locked all_files = [ ff for ff in all_files if not os.path.isfile(os.path.splitext(ff)[0] + '.lock') ] if not all_files: return # Add files to transit tracker for ff in all_files: transit_tracker.add_file(ff) # Extract point source transits ready for analysis all_transits = transit_tracker.get_transits() # Create dictionary to hold results h5_psrc_fit = {} inputmap = None # Loop over transits for transit in all_transits: src, csd, is_day, files, start, stop = transit # Discard any point sources with unusual csd value if (csd < config.min_csd) or (csd > config.max_csd): continue # Discard any point sources transiting during the day if is_day > config.process_daytime: continue mlog.info( 'Processing %s transit on CSD %d (%d files, %d time samples)' % (src, csd, len(files), stop - start + 1)) # Load inputmap if inputmap is None: if config.inputmap is None: inputmap = tools.get_correlator_inputs( ephemeris.unix_to_datetime(ephemeris.csd_to_unix(csd)), correlator=config.correlator) else: with open(config.inputmap, 'r') as handler: inputmap = pickle.load(handler) # Grab the timing correction for this transit tcorr = None if config.apply_timing: if config.timing_glob is not None: mlog.info( "Loading timing correction from extended timing solutions." ) timing_files = sorted(glob.glob(config.timing_glob)) if timing_files: try: tcorr = search_extended_timing_solutions( timing_files, ephemeris.csd_to_unix(csd)) except Exception as e: mlog.error( 'search_extended_timing_solutions failed with error: %s' % e) else: mlog.info(str(tcorr)) if tcorr is None: mlog.info( "Loading timing correction from chimetiming acquisitions.") try: tcorr = timing.load_timing_correction( files, start=start, stop=stop, window=config.timing_window, instrument=config.correlator) except Exception as e: mlog.error( 'timing.load_timing_correction failed with error: %s' % e) mlog.warning( 'No timing correction applied to %s transit on CSD %d.' % (src, csd)) else: mlog.info(str(tcorr)) # Call the main routine to process data try: outdct = offline_cal.offline_point_source_calibration( files, src, start=start, stop=stop, inputmap=inputmap, tcorr=tcorr, logging_params=logging_params, **config.analysis.as_dict()) except Exception as e: msg = 'offline_cal.offline_point_source_calibration failed with error: %s' % e mlog.error(msg) continue #raise RuntimeError(msg) # Find existing gain files for this particular point source if src not in h5_psrc_fit: output_files = find_files(config, psrc=src) if output_files is not None: output_files = output_files[-1] mlog.info('Writing %s transit on CSD %d to existing file %s.' % (src, csd, output_files)) h5_psrc_fit[src] = containers.PointSourceWriter( src, output_file=output_files, output_dir=config.output_dir, output_suffix=point_source_name_to_file_suffix(src), instrument=config.correlator, max_file_size=config.max_file_size, max_num=config.max_num_time, memory_size=0) # Associate this gain calibration to the transit time this_time = ephemeris.transit_times(FluxCatalog[src].skyfield, ephemeris.csd_to_unix(csd))[0] outdct['csd'] = csd outdct['is_daytime'] = is_day outdct['acquisition'] = os.path.basename(os.path.dirname(files[0])) # Write to output file mlog.info('Writing to disk results from %s transit on CSD %d.' % (src, csd)) h5_psrc_fit[src].write(this_time, **outdct) # Dump an individual file for this point source transit mlog.info('Dumping to disk single file for %s transit on CSD %d.' % (src, csd)) dump_dir = os.path.join(config.output_dir, 'point_source_gains') containers.mkdir(dump_dir) dump_file = os.path.join(dump_dir, '%s_csd_%d.h5' % (src.lower(), csd)) h5_psrc_fit[src].dump(dump_file, datasets=[ 'csd', 'acquisition', 'is_daytime', 'gain', 'weight', 'timing', 'model' ]) mlog.info('Finished analysis of %s transit on CSD %d.' % (src, csd))
def main(config_file=None, logging_params=DEFAULT_LOGGING): # Setup logging log.setup_logging(logging_params) mlog = log.get_logger(__name__) # Set config config = DEFAULTS.deepcopy() if config_file is not None: config.merge(NameSpace(load_yaml_config(config_file))) # Set niceness current_niceness = os.nice(0) os.nice(config.niceness - current_niceness) mlog.info('Changing process niceness from %d to %d. Confirm: %d' % (current_niceness, config.niceness, os.nice(0))) # Create output suffix output_suffix = config.output_suffix if config.output_suffix is not None else "jumps" # Calculate the wavelet transform for the following scales nwin = 2 * config.max_scale + 1 nhwin = nwin // 2 if config.log_scale: mlog.info("Using log scale.") scale = np.logspace(np.log10(config.min_scale), np.log10(nwin), num=config.num_points, dtype=np.int) else: mlog.info("Using linear scale.") scale = np.arange(config.min_scale, nwin, dtype=np.int) # Loop over acquisitions for acq in config.acq: # Find acquisition files all_data_files = sorted(glob(os.path.join(config.data_dir, acq, "*.h5"))) nfiles = len(all_data_files) if nfiles == 0: continue mlog.info("Now processing acquisition %s (%d files)" % (acq, nfiles)) # Determine list of feeds to examine dset = ['flags/inputs'] if config.use_input_flag else () rdr = andata.CorrData.from_acq_h5(all_data_files, datasets=dset, apply_gain=False, renormalize=False) inputmap = tools.get_correlator_inputs(ephemeris.unix_to_datetime(rdr.time[0]), correlator='chime') # Extract good inputs if config.use_input_flag: ifeed = np.flatnonzero((np.sum(rdr.flags['inputs'][:], axis=-1, dtype=np.int) / float(rdr.flags['inputs'].shape[-1])) > config.input_threshold) else: ifeed = np.array([ii for ii, inp in enumerate(inputmap) if tools.is_chime(inp)]) ninp = len(ifeed) mlog.info("Processing %d feeds." % ninp) # Create list of candidates cfreq, cinput, ctime, cindex = [], [], [], [] jump_flag, jump_time, jump_auto = [], [], [] ncandidate = 0 # Determine number of files to process at once if config.max_num_file is None: chunk_size = nfiles else: chunk_size = min(config.max_num_file, nfiles) # Loop over chunks of files for chnk, data_files in enumerate(chunks(all_data_files, chunk_size)): mlog.info("Now processing chunk %d (%d files)" % (chnk, len(data_files))) # Deteremine selections along the various axes rdr = andata.CorrData.from_acq_h5(data_files, datasets=()) auto_sel = np.array([ii for ii, pp in enumerate(rdr.prod) if pp[0] == pp[1]]) auto_sel = andata._convert_to_slice(auto_sel) if config.time_start is None: ind_start = 0 else: time_start = ephemeris.datetime_to_unix(datetime.datetime(*config.time_start)) ind_start = int(np.argmin(np.abs(rdr.time - time_start))) if config.time_stop is None: ind_stop = rdr.ntime else: time_stop = ephemeris.datetime_to_unix(datetime.datetime(*config.time_stop)) ind_stop = int(np.argmin(np.abs(rdr.time - time_stop))) if config.freq_physical is not None: if hasattr(config.freq_physical, '__iter__'): freq_physical = config.freq_physical else: freq_physical = [config.freq_physical] freq_sel = [np.argmin(np.abs(ff - rdr.freq)) for ff in freq_physical] freq_sel = andata._convert_to_slice(freq_sel) else: fstart = config.freq_start if config.freq_start is not None else 0 fstop = config.freq_stop if config.freq_stop is not None else rdr.freq.size freq_sel = slice(fstart, fstop) # Load autocorrelations t0 = time.time() data = andata.CorrData.from_acq_h5(data_files, datasets=['vis'], start=ind_start, stop=ind_stop, freq_sel=freq_sel, prod_sel=auto_sel, apply_gain=False, renormalize=False) mlog.info("Took %0.1f seconds to load autocorrelations." % (time.time() - t0,)) # If first chunk, save the frequencies that are being used if not chnk: all_freq = data.freq.copy() # If requested do not consider data during day or near bright source transits flag_quiet = np.ones(data.ntime, dtype=np.bool) if config.ignore_sun: flag_quiet &= ~transit_flag('sun', data.time, freq=np.min(data.freq), pol='X', nsig=1.0) if config.only_quiet: flag_quiet &= ~daytime_flag(data.time) for ss in ["CYG_A", "CAS_A", "TAU_A", "VIR_A"]: flag_quiet &= ~transit_flag(ss, data.time, freq=np.min(data.freq), pol='X', nsig=1.0) # Loop over frequencies for ff, freq in enumerate(data.freq): print_cnt = 0 mlog.info("FREQ %d (%0.2f MHz)" % (ff, freq)) auto = data.vis[ff, :, :].real fractional_auto = auto * tools.invert_no_zero(np.median(auto, axis=-1, keepdims=True)) - 1.0 # Loop over inputs for ii in ifeed: print_cnt += 1 do_print = not (print_cnt % 100) if do_print: mlog.info("INPUT %d" % ii) t0 = time.time() signal = fractional_auto[ii, :] # Perform wavelet transform coef, freqs = pywt.cwt(signal, scale, config.wavelet_name) if do_print: mlog.info("Took %0.1f seconds to perform wavelet transform." % (time.time() - t0,)) t0 = time.time() # Find local modulus maxima flg_mod_max, mod_max = mod_max_finder(scale, coef, threshold=config.thresh, search_span=config.search_span) if do_print: mlog.info("Took %0.1f seconds to find modulus maxima." % (time.time() - t0,)) t0 = time.time() # Find persisent modulus maxima across scales candidates, cmm, pdrift, start, stop, lbl = finger_finder(scale, flg_mod_max, mod_max, istart=max(config.min_rise - config.min_scale, 0), do_fill=False) if do_print: mlog.info("Took %0.1f seconds to find fingers." % (time.time() - t0,)) t0 = time.time() if candidates is None: continue # Cut bad candidates index_good_candidates = np.flatnonzero((scale[stop] >= config.max_scale) & flag_quiet[candidates[start, np.arange(start.size)]] & (pdrift <= config.psigma_max)) ngood = index_good_candidates.size if ngood == 0: continue mlog.info("Input %d has %d jumps" % (ii, ngood)) # Add remaining candidates to list ncandidate += ngood cfreq += [freq] * ngood cinput += [ii] * ngood for igc in index_good_candidates: icenter = candidates[start[igc], igc] cindex.append(icenter) ctime.append(data.time[icenter]) aa = max(0, icenter - nhwin) bb = min(data.ntime, icenter + nhwin + 1) ncut = bb - aa temp_var = np.zeros(nwin, dtype=np.bool) temp_var[0:ncut] = True jump_flag.append(temp_var) temp_var = np.zeros(nwin, dtype=data.time.dtype) temp_var[0:ncut] = data.time[aa:bb].copy() jump_time.append(temp_var) temp_var = np.zeros(nwin, dtype=auto.dtype) temp_var[0:ncut] = auto[ii, aa:bb].copy() jump_auto.append(temp_var) # Garbage collect del data gc.collect() # If we found any jumps, write them to a file. if ncandidate > 0: output_file = os.path.join(config.output_dir, "%s_%s.h5" % (acq, output_suffix)) mlog.info("Writing %d jumps to: %s" % (ncandidate, output_file)) # Write to output file with h5py.File(output_file, 'w') as handler: handler.attrs['files'] = all_data_files handler.attrs['chan_id'] = ifeed handler.attrs['freq'] = all_freq index_map = handler.create_group('index_map') index_map.create_dataset('jump', data=np.arange(ncandidate)) index_map.create_dataset('window', data=np.arange(nwin)) ax = np.array(['jump']) dset = handler.create_dataset('freq', data=np.array(cfreq)) dset.attrs['axis'] = ax dset = handler.create_dataset('input', data=np.array(cinput)) dset.attrs['axis'] = ax dset = handler.create_dataset('time', data=np.array(ctime)) dset.attrs['axis'] = ax dset = handler.create_dataset('time_index', data=np.array(cindex)) dset.attrs['axis'] = ax ax = np.array(['jump', 'window']) dset = handler.create_dataset('jump_flag', data=np.array(jump_flag)) dset.attrs['axis'] = ax dset = handler.create_dataset('jump_time', data=np.array(jump_time)) dset.attrs['axis'] = ax dset = handler.create_dataset('jump_auto', data=np.array(jump_auto)) dset.attrs['axis'] = ax else: mlog.info("No jumps found for %s acquisition." % acq)