from ch_util import andata from ch_util import tools from ch_util import cal_utils from ch_util import timing from ch_util import rfi from complex_gain import sutil from complex_gain.temps import TempData ################################################### # default variables ################################################### DEFAULTS = NameSpace( load_yaml_config( os.path.join(os.path.dirname(os.path.realpath(__file__)), 'defaults.yaml') + ':joint_regression')) LOG_FILE = os.environ.get( 'JOINT_REGRESSION_LOG_FILE', os.path.join(os.path.dirname(os.path.realpath(__file__)), 'joint_regression.log')) DEFAULT_LOGGING = { 'formatters': { 'std': { 'format': "%(asctime)s %(levelname)s %(name)s: %(message)s", 'datefmt': "%m/%d %H:%M:%S" }, }, 'handlers': {
def main(config_file=None, logging_params=DEFAULT_LOGGING): # Load config config = DEFAULTS.deepcopy() if config_file is not None: print(config_file) config.merge(NameSpace(load_yaml_config(config_file))) # Setup logging log.setup_logging(logging_params) logger = log.get_logger(__name__) timer = Timer(logger) # Load data sfile = config.data.filename if os.path.isabs( config.data.filename) else os.path.join(config.directory, config.data.filename) sdata = StabilityData.from_file(sfile) ninput, ntime = sdata['tau'].shape # Load temperature data tfile = (config.temperature.filename if os.path.isabs(config.temperature.filename) else os.path.join( config.directory, config.temperature.filename)) tkeys = ['flag', 'data_flag', 'outlier'] if config.temperature.load: tkeys += config.temperature.load tdata = TempData.from_acq_h5(tfile, datasets=tkeys) # Query layout database inputmap = tools.get_correlator_inputs(ephemeris.unix_to_datetime( np.median(sdata.time[:])), correlator='chime') good_input = np.flatnonzero(np.any(sdata['flags']['tau'][:], axis=-1)) pol = sutil.get_pol(sdata, inputmap) npol = len(pol) mezz_index, crate_index = sutil.get_mezz_and_crate(sdata, inputmap) if config.mezz_ref.enable: phase_ref = [ ipol[mezz_index[ipol] == iref] for ipol, iref in zip(pol, config.mezz_ref.mezz) ] else: phase_ref = config.data.phase_ref # Load timing if config.timing.enable: # Extract filenames from config timing_files = [ tf if os.path.isabs(tf) else os.path.join(config.directory, tf) for tf in config.timing.files ] timing_files_hpf = [ os.path.join(os.path.dirname(tf), 'hpf', os.path.basename(tf)) for tf in timing_files ] timing_files_lpf = [ os.path.join(os.path.dirname(tf), 'lpf', os.path.basename(tf)) for tf in timing_files ] # If requested, add the timing data back into the delay data if config.timing.add.enable: timer.start("Adding timing data to delay measurements.") ns_tau, _, ns_flag, ns_inputs = sutil.get_timing_correction( sdata, timing_files, **config.timing.add.kwargs) index = timing.map_input_to_noise_source(sdata.index_map['input'], ns_inputs) timing_tau = ns_tau[index, :] timing_flag = ns_flag[index, :] for ipol, iref in zip(pol, config.data.phase_ref): timing_tau[ipol, :] = timing_tau[ipol, :] - timing_tau[ iref, np.newaxis, :] timing_flag[ipol, :] = timing_flag[ipol, :] & timing_flag[ iref, np.newaxis, :] sdata['tau'][:] = sdata['tau'][:] + timing_tau sdata['flags']['tau'][:] = sdata['flags']['tau'][:] & timing_flag timer.stop() # Extract the dependent variables from the timing dataset timer.start("Calculating timing dependence.") if config.timing.sep_delay: logger.info("Fitting HPF and LPF timing correction separately.") files = timing_files_hpf files2 = timing_files_lpf else: files2 = None if config.timing.hpf_delay: logger.info("Using HPF timing correction for delay.") files = timing_files_hpf elif config.timing.lpf_delay: logger.info("Using LPF timing correction for delay.") files = timing_files_lpf else: logger.info("Using full timing correction for delay.") files = timing_files kwargs = {} if config.timing.lpf_amp: logger.info("Using LPF timing correction for amplitude.") kwargs['afiles'] = timing_files_lpf elif config.timing.hpf_amp: logger.info("Using HPF timing correction for amplitude.") kwargs['afiles'] = timing_files_hpf else: logger.info("Using full timing correction for amplitude.") kwargs['afiles'] = timing_files for key in ['ns_ref', 'inter_cmn', 'fit_amp', 'ref_amp', 'cmn_amp']: if key in config.timing: kwargs[key] = config.timing[key] xtiming, xtiming_flag, xtiming_group = sutil.timing_dependence( sdata, files, inputmap, **kwargs) if files2 is not None: logger.info("Calculating second timing dependence.") kwargs['fit_amp'] = False xtiming2, xtiming2_flag, xtiming2_group = sutil.timing_dependence( sdata, files2, inputmap, **kwargs) xtiming = np.concatenate((xtiming, xtiming2), axis=-1) xtiming_flag = np.concatenate((xtiming_flag, xtiming2_flag), axis=-1) xtiming_group = np.concatenate((xtiming_group, xtiming2_group), axis=-1) timer.stop() else: xtiming = None xtiming_flag = None xtiming_group = None # Reference delay data to mezzanine if config.mezz_ref.enable: timer.start("Referencing delay measurements to mezzanine.") for ipol, iref in zip(pol, config.mezz_ref.mezz): this_mezz = ipol[mezz_index[ipol] == iref] wmezz = sdata['flags']['tau'][this_mezz, :].astype(np.float32) norm = np.sum(wmezz, axis=0) taut_mezz = np.sum(wmezz * sdata['tau'][this_mezz, :], axis=0) * tools.invert_no_zero(norm) flagt_mezz = norm > 0.0 sdata['tau'][ ipol, :] = sdata['tau'][ipol, :] - taut_mezz[np.newaxis, :] sdata['flags']['tau'][ipol, :] = sdata['flags']['tau'][ ipol, :] & flagt_mezz[np.newaxis, :] timer.stop() # Load cable monitor if config.cable_monitor.enable: timer.start("Calculating cable monitor dependence.") cbl = timing.TimingCorrection.from_acq_h5( config.cable_monitor.filename) kwargs = {'include_diff': config.cable_monitor.include_diff} xcable, xcable_flag, xcable_group = sutil.cable_monitor_dependence( sdata, cbl, **kwargs) timer.stop() else: xcable = None xcable_flag = None xcable_group = None # Load NS distance if config.ns_distance.enable: timer.start("Calculating NS distance dependence.") kwargs = {} kwargs['phase_ref'] = phase_ref for key in [ 'sensor', 'temp_field', 'sep_cyl', 'sep_feed', 'include_offset', 'include_ha' ]: if key in config.ns_distance: kwargs[key] = config.ns_distance[key] if config.ns_distance.use_cable_monitor: kwargs['is_cable_monitor'] = True kwargs['use_alpha'] = config.ns_distance.use_alpha nsx = timing.TimingCorrection.from_acq_h5( config.cable_monitor.filename) else: kwargs['is_cable_monitor'] = False nsx = tdata xdist, xdist_flag, xdist_group = sutil.ns_distance_dependence( sdata, nsx, inputmap, **kwargs) if (config.ns_distance.deriv is not None) and (config.ns_distance.deriv > 0): for dd in range(1, config.ns_distance.deriv + 1): d_xdist, d_xdist_flag, d_xdist_group = sutil.ns_distance_dependence( sdata, tdata, inputmap, deriv=dd, **kwargs) tind = np.atleast_1d(1) xdist = np.concatenate((xdist, d_xdist[:, :, tind]), axis=-1) xdist_flag = xnp.concatenate( (xdist_flag, d_xdist_flag[:, :, tind]), axis=-1) xdist_group = np.concatenate( (xdist_group, d_xdist_group[:, tind]), axis=-1) timer.stop() else: xdist = None xdist_flag = None xdist_group = None # Load temperatures if config.temperature.enable: timer.start("Calculating temperature dependence.") xtemp, xtemp_flag, xtemp_group, xtemp_name = sutil.temperature_dependence( sdata, tdata, config.temperature.sensor, field=config.temperature.temp_field, inputmap=inputmap, phase_ref=phase_ref, check_hut=config.temperature.check_hut) if (config.temperature.deriv is not None) and (config.temperature.deriv > 0): for dd in range(1, config.temperature.deriv + 1): d_xtemp, d_xtemp_flag, d_xtemp_group, d_xtemp_name = sutil.temperature_dependence( sdata, tdata, config.temperature.sensor, field=config.temperature.temp_field, deriv=dd, inputmap=inputmap, phase_ref=phase_ref, check_hut=config.temperature.check_hut) xtemp = np.concatenate((xtemp, d_xtemp), axis=-1) xtemp_flag = xnp.concatenate((xtemp_flag, d_xtemp_flag), axis=-1) xtemp_group = np.concatenate((xtemp_group, d_xtemp_group), axis=-1) xtemp_name += d_xtemp_name timer.stop() else: xtemp = None xtemp_flag = None xtemp_group = None xtemp_name = None # Combine into single feature matrix x, coeff_name = _concatenate(xdist, xtemp, xcable, xtiming, name_xtemp=xtemp_name) x_group, _ = _concatenate(xdist_group, xtemp_group, xcable_group, xtiming_group) x_flag, _ = _concatenate(xdist_flag, xtemp_flag, xcable_flag, xtiming_flag) x_flag = np.all(x_flag, axis=-1) & sdata.flags['tau'][:] nfeature = x.shape[-1] logger.info("Fitting %d features." % nfeature) # Save data if config.preliminary_save.enable: if config.preliminary_save.filename is not None: ofile = (config.preliminary_save.filename if os.path.isabs( config.preliminary_save.filename) else os.path.join( config.directory, config.preliminary_save.filename)) else: ofile = os.path.splitext( sfile)[0] + '_%s.h5' % config.preliminary_save.suffix sdata.save(ofile, mode='w') # Subtract mean if config.mean_subtract: timer.start("Subtracting mean value.") tau, mu_tau, mu_tau_flag = sutil.mean_subtract(sdata, sdata['tau'][:], x_flag, use_calibrator=True) mu_x = np.zeros(mu_tau.shape + (nfeature, ), dtype=x.dtype) mu_x_flag = np.zeros(mu_tau.shape + (nfeature, ), dtype=np.bool) x_no_mu = x.copy() for ff in range(nfeature): x_no_mu[..., ff], mu_x[..., ff], mu_x_flag[..., ff] = sutil.mean_subtract( sdata, x[:, :, ff], x_flag, use_calibrator=True) timer.stop() else: x_no_mu = x.copy() tau = sdata['tau'][:].copy() # Calculate unique days csd_uniq, bmap = np.unique(sdata['csd'][:], return_inverse=True) ncsd = csd_uniq.size # Prepare unique sources classification = np.char.add(np.char.add(sdata['calibrator'][:], '/'), sdata['source'][:]) # If requested, load existing coefficients if config.coeff is not None: coeff = andata.BaseData.from_acq_h5(config.coeff) evaluate_only = True else: evaluate_only = False # If requested, set up boot strapping if config.bootstrap.enable: nboot = config.bootstrap.number nchoices = ncsd if config.bootstrap.by_transit else ntime nsample = int(config.bootstrap.fraction * nchoices) bindex = np.zeros((nboot, nsample), dtype=np.int) for roll in range(nboot): bindex[roll, :] = np.sort( np.random.choice(nchoices, size=nsample, replace=config.bootstrap.replace)) else: nboot = 1 bindex = np.arange(ntime, dtype=np.int)[np.newaxis, :] # Prepare output if config.output.directory is not None: output_dir = config.output.directory else: output_dir = config.data.directory if config.output.suffix is not None: output_suffix = config.output.suffix else: output_suffix = os.path.splitext(os.path.basename( config.data.filename))[0] # Perform joint fit for bb, bind in enumerate(bindex): if config.bootstrap.enable and config.bootstrap.by_transit: tind = np.concatenate( tuple([np.flatnonzero(bmap == ii) for ii in bind])) else: tind = bind ntime = tind.size if config.jackknife.enable: start = int( config.jackknife.start * ncsd ) if config.jackknife.start <= 1.0 else config.jackknife.start end = int( config.jackknife.end * ncsd) if config.jackknife.end <= 1.0 else config.jackknife.end time_flag_fit = (bmap >= start) & (bmap < end) if config.jackknife.restrict_stat: time_flag_stat = np.logical_not(time_flag_fit) else: time_flag_stat = np.ones(ntime, dtype=np.bool) else: time_flag_fit = np.ones(ntime, dtype=np.bool) time_flag_stat = np.ones(ntime, dtype=np.bool) logger.info( "Fitting data between %s (CSD %d) and %s (CSD %d)" % (ephemeris.unix_to_datetime(np.min( sdata.time[tind[time_flag_fit]])).strftime("%Y-%m-%d"), np.min(sdata['csd'][:][tind[time_flag_fit]]), ephemeris.unix_to_datetime(np.max( sdata.time[tind[time_flag_fit]])).strftime("%Y-%m-%d"), np.max(sdata['csd'][:][tind[time_flag_fit]]))) logger.info( "Calculating statistics from data between %s (CSD %d) and %s (CSD %d)" % (ephemeris.unix_to_datetime( np.min(sdata.time[tind[time_flag_stat]])).strftime("%Y-%m-%d"), np.min(sdata['csd'][:][tind[time_flag_stat]]), ephemeris.unix_to_datetime( np.max( sdata.time[tind[time_flag_stat]])).strftime("%Y-%m-%d"), np.max(sdata['csd'][:][tind[time_flag_stat]]))) if evaluate_only: timer.start("Evaluating coefficients provided.") fitter = sutil.JointTempEvaluation( x_no_mu[:, tind, :], tau[:, tind], coeff['coeff'][:], flag=x_flag[:, tind], coeff_name=coeff.index_map['feature'][:], feature_name=coeff_name, intercept=coeff['intercept'][:], intercept_name=coeff.index_map['classification'][:], classification=classification[tind]) timer.stop() else: timer.start("Setting up fit. Bootstrap %d of %d." % (bb + 1, nboot)) fitter = sutil.JointTempRegression( x_no_mu[:, tind, :], tau[:, tind], x_group, flag=x_flag[:, tind], classification=classification[tind], coeff_name=coeff_name) timer.stop() timer.start("Performing fit. Bootstrap %d of %d." % (bb + 1, nboot)) fitter.fit_temp(time_flag=time_flag_fit, **config.fit_options) timer.stop() # If bootstrapping, append counter to filename if config.bootstrap.enable: output_suffix_bb = output_suffix + "_bootstrap_%04d" % ( config.bootstrap.index_start + bb, ) with open( os.path.join(output_dir, "bootstrap_index_%s.json" % output_suffix_bb), 'w') as jhandler: json.dump({ "bind": bind.tolist(), "tind": tind.tolist() }, jhandler) else: output_suffix_bb = output_suffix # Save statistics to file if config.output.stat: # If requested, break the model up into its various components for calculating statistics stat_key = ['data', 'model', 'resid'] if config.refine_model.enable: stat_add = fitter.refine_model(config.refine_model.include) stat_key += stat_add # Redefine axes bdata = StabilityData() for dset in ["source", "csd", "calibrator", "calibrator_time"]: bdata.create_dataset(dset, data=sdata[dset][tind]) bdata.create_index_map("time", sdata.index_map["time"][tind]) bdata.create_index_map("input", sdata.index_map["input"][:]) bdata.attrs["calibrator"] = sdata.attrs.get("calibrator", "CYG_A") # Calculate statistics stat = {} for statistic in ['std', 'mad']: for attr in stat_key: for ref, ref_common in zip(['mezz', 'cmn'], [False, True]): stat[(statistic, attr, ref)] = sutil.short_long_stat( bdata, getattr(fitter, attr), fitter._flag & time_flag_stat[np.newaxis, :], stat=statistic, ref_common=ref_common, pol=pol) output_filename = os.path.join(output_dir, "stat_%s.h5" % output_suffix_bb) write_stat(bdata, stat, fitter, output_filename) # Save coefficients to file if config.output.coeff: output_filename = os.path.join(output_dir, "coeff_%s.h5" % output_suffix_bb) write_coeff(sdata, fitter, output_filename) # Save residuals to file if config.output.resid: output_filename = os.path.join(output_dir, "resid_%s.h5" % output_suffix_bb) write_resid(sdata, fitter, output_filename) del fitter gc.collect()
def main(config_file=None, logging_params=DEFAULT_LOGGING): # Load config config = DEFAULTS.deepcopy() if config_file is not None: config.merge(NameSpace(load_yaml_config(config_file))) # Setup logging log.setup_logging(logging_params) logger = log.get_logger(__name__) ## Load data for flagging # Load fpga restarts time_fpga_restart = [] if config.fpga_restart_file is not None: with open(config.fpga_restart_file, 'r') as handler: for line in handler: time_fpga_restart.append( ephemeris.datetime_to_unix( ephemeris.timestr_to_datetime(line.split('_')[0]))) time_fpga_restart = np.array(time_fpga_restart) # Load housekeeping flag if config.housekeeping_file is not None: ftemp = TempData.from_acq_h5(config.housekeeping_file, datasets=["time_flag"]) else: ftemp = None # Load jump data if config.jump_file is not None: with h5py.File(config.jump_file, 'r') as handler: jump_time = handler["time"][:] jump_size = handler["jump_size"][:] else: jump_time = None jump_size = None # Load rain data if config.rain_file is not None: with h5py.File(config.rain_file, 'r') as handler: rain_ranges = handler["time_range_conservative"][:] else: rain_ranges = [] # Load data flags data_flags = {} if config.data_flags: finder.connect_database() flag_types = finder.DataFlagType.select() possible_data_flags = [] for ft in flag_types: possible_data_flags.append(ft.name) if ft.name in config.data_flags: new_data_flags = finder.DataFlag.select().where( finder.DataFlag.type == ft) data_flags[ft.name] = list(new_data_flags) # Set desired range of time start_time = (ephemeris.datetime_to_unix( datetime.datetime( *config.start_date)) if config.start_date is not None else None) end_time = (ephemeris.datetime_to_unix(datetime.datetime( *config.end_date)) if config.end_date is not None else None) ## Find gain files files = {} for src in config.sources: files[src] = sorted( glob.glob( os.path.join(config.directory, src.lower(), "%s_%s_lsd_*.h5" % ( config.prefix, src.lower(), )))) csd = {} for src in config.sources: csd[src] = np.array( [int(os.path.splitext(ff)[0][-4:]) for ff in files[src]]) for src in config.sources: logger.info("%s: %d files" % (src, len(csd[src]))) ## Remove files that occur during flag csd_flag = {} for src in config.sources: body = ephemeris.source_dictionary[src] csd_flag[src] = np.ones(csd[src].size, dtype=np.bool) for ii, cc in enumerate(csd[src][:]): ttrans = ephemeris.transit_times(body, ephemeris.csd_to_unix(cc))[0] if (start_time is not None) and (ttrans < start_time): csd_flag[src][ii] = False continue if (end_time is not None) and (ttrans > end_time): csd_flag[src][ii] = False continue # If requested, remove daytime transits if not config.include_daytime.get( src, config.include_daytime.default) and daytime_flag( ttrans)[0]: logger.info("%s CSD %d: daytime transit" % (src, cc)) csd_flag[src][ii] = False continue # Remove transits during HKP drop out if ftemp is not None: itemp = np.flatnonzero( (ftemp.time[:] >= (ttrans - config.transit_window)) & (ftemp.time[:] <= (ttrans + config.transit_window))) tempflg = ftemp['time_flag'][itemp] if (tempflg.size == 0) or ((np.sum(tempflg, dtype=np.float32) / float(tempflg.size)) < 0.50): logger.info("%s CSD %d: no housekeeping" % (src, cc)) csd_flag[src][ii] = False continue # Remove transits near jumps if jump_time is not None: njump = np.sum((jump_size > config.min_jump_size) & (jump_time > (ttrans - config.jump_window)) & (jump_time < ttrans)) if njump > config.max_njump: logger.info("%s CSD %d: %d jumps before" % (src, cc, njump)) csd_flag[src][ii] = False continue # Remove transits near rain for rng in rain_ranges: if (((ttrans - config.transit_window) <= rng[1]) and ((ttrans + config.transit_window) >= rng[0])): logger.info("%s CSD %d: during rain" % (src, cc)) csd_flag[src][ii] = False break # Remove transits during data flag for name, flag_list in data_flags.items(): if csd_flag[src][ii]: for flg in flag_list: if (((ttrans - config.transit_window) <= flg.finish_time) and ((ttrans + config.transit_window) >= flg.start_time)): logger.info("%s CSD %d: %s flag" % (src, cc, name)) csd_flag[src][ii] = False break # Print number of files left after flagging for src in config.sources: logger.info("%s: %d files (after flagging)" % (src, np.sum(csd_flag[src]))) ## Construct pair wise differences npair = len(config.diff_pair) shift = [nd * 24.0 * 3600.0 for nd in config.nday_shift] calmap = [] calpair = [] for (tsrc, csrc), sh in zip(config.diff_pair, shift): body_test = ephemeris.source_dictionary[tsrc] body_cal = ephemeris.source_dictionary[csrc] for ii, cc in enumerate(csd[tsrc]): if csd_flag[tsrc][ii]: test_transit = ephemeris.transit_times( body_test, ephemeris.csd_to_unix(cc))[0] cal_transit = ephemeris.transit_times(body_cal, test_transit + sh)[0] cal_csd = int(np.fix(ephemeris.unix_to_csd(cal_transit))) ttrans = np.sort([test_transit, cal_transit]) if cal_csd in csd[csrc]: jj = list(csd[csrc]).index(cal_csd) if csd_flag[csrc][jj] and not np.any( (time_fpga_restart >= ttrans[0]) & (time_fpga_restart <= ttrans[1])): calmap.append([ii, jj]) calpair.append([tsrc, csrc]) calmap = np.array(calmap) calpair = np.array(calpair) ntransit = calmap.shape[0] logger.info("%d total transit pairs" % ntransit) for ii in range(ntransit): t1 = ephemeris.transit_times( ephemeris.source_dictionary[calpair[ii, 0]], ephemeris.csd_to_unix(csd[calpair[ii, 0]][calmap[ii, 0]]))[0] t2 = ephemeris.transit_times( ephemeris.source_dictionary[calpair[ii, 1]], ephemeris.csd_to_unix(csd[calpair[ii, 1]][calmap[ii, 1]]))[0] logger.info("%s (%d) - %s (%d): %0.1f hr" % (calpair[ii, 0], csd_flag[calpair[ii, 0]][calmap[ii, 0]], calpair[ii, 1], csd_flag[calpair[ii, 1]][calmap[ii, 1]], (t1 - t2) / 3600.0)) # Determine unique diff pairs diff_name = np.array(['%s/%s' % tuple(cp) for cp in calpair]) uniq_diff, lbl_diff, cnt_diff = np.unique(diff_name, return_inverse=True, return_counts=True) ndiff = uniq_diff.size for ud, udcnt in zip(uniq_diff, cnt_diff): logger.info("%s: %d transit pairs" % (ud, udcnt)) ## Load gains inputmap = tools.get_correlator_inputs(datetime.datetime.utcnow(), correlator='chime') ninput = len(inputmap) nfreq = 1024 # Set up gain arrays gain = np.zeros((2, nfreq, ninput, ntransit), dtype=np.complex64) weight = np.zeros((2, nfreq, ninput, ntransit), dtype=np.float32) input_sort = np.zeros((2, ninput, ntransit), dtype=np.int) kcsd = np.zeros((2, ntransit), dtype=np.float32) timestamp = np.zeros((2, ntransit), dtype=np.float64) is_daytime = np.zeros((2, ntransit), dtype=np.bool) for tt in range(ntransit): for kk, (src, ind) in enumerate(zip(calpair[tt], calmap[tt])): body = ephemeris.source_dictionary[src] filename = files[src][ind] logger.info("%s: %s" % (src, filename)) temp = containers.StaticGainData.from_file(filename) freq = temp.freq[:] inputs = temp.input[:] isort = reorder_inputs(inputmap, inputs) inputs = inputs[isort] gain[kk, :, :, tt] = temp.gain[:, isort] weight[kk, :, :, tt] = temp.weight[:, isort] input_sort[kk, :, tt] = isort kcsd[kk, tt] = temp.attrs['lsd'] timestamp[kk, tt] = ephemeris.transit_times( body, ephemeris.csd_to_unix(kcsd[kk, tt]))[0] is_daytime[kk, tt] = daytime_flag(timestamp[kk, tt])[0] if np.any(isort != np.arange(isort.size)): logger.info("Input ordering has changed: %s" % ephemeris.unix_to_datetime( timestamp[kk, tt]).strftime("%Y-%m-%d")) logger.info("") inputs = np.array([(inp.id, inp.input_sn) for inp in inputmap], dtype=[('chan_id', 'u2'), ('correlator_input', 'S32')]) ## Load input flags inpflg = np.ones((2, ninput, ntransit), dtype=np.bool) min_flag_time = np.min(timestamp) - 7.0 * 24.0 * 60.0 * 60.0 max_flag_time = np.max(timestamp) + 7.0 * 24.0 * 60.0 * 60.0 flaginput_files = sorted( glob.glob( os.path.join(config.flaginput_dir, "*" + config.flaginput_suffix, "*.h5"))) if flaginput_files: logger.info("Found %d flaginput files." % len(flaginput_files)) tmp = andata.FlagInputData.from_acq_h5(flaginput_files, datasets=()) start, stop = [ int(yy) for yy in np.percentile( np.flatnonzero((tmp.time[:] >= min_flag_time) & (tmp.time[:] <= max_flag_time)), [0, 100]) ] cont = andata.FlagInputData.from_acq_h5(flaginput_files, start=start, stop=stop, datasets=['flag']) for kk in range(2): inpflg[kk, :, :] = cont.resample('flag', timestamp[kk], transpose=True) logger.info("Flaginput time offsets in minutes (pair %d):" % kk) logger.info( str( np.fix((cont.time[cont.search_update_time(timestamp[kk])] - timestamp[kk]) / 60.0).astype(np.int))) # Sort flags so they are in same order for tt in range(ntransit): for kk in range(2): inpflg[kk, :, tt] = inpflg[kk, input_sort[kk, :, tt], tt] # Do not apply input flag to phase reference for ii in config.index_phase_ref: inpflg[:, ii, :] = True ## Flag out gains with high uncertainty and frequencies with large fraction of data flagged frac_err = tools.invert_no_zero(np.sqrt(weight) * np.abs(gain)) flag = np.all((weight > 0.0) & (np.abs(gain) > 0.0) & (frac_err < config.max_uncertainty), axis=0) freq_flag = ((np.sum(flag, axis=(1, 2), dtype=np.float32) / float(np.prod(flag.shape[1:]))) > config.freq_threshold) if config.apply_rfi_mask: freq_flag &= np.logical_not(rfi.frequency_mask(freq)) flag = flag & freq_flag[:, np.newaxis, np.newaxis] good_freq = np.flatnonzero(freq_flag) logger.info("Number good frequencies %d" % good_freq.size) ## Generate flags with more conservative cuts on frequency c_flag = flag & np.all(frac_err < config.conservative.max_uncertainty, axis=0) c_freq_flag = ((np.sum(c_flag, axis=(1, 2), dtype=np.float32) / float(np.prod(c_flag.shape[1:]))) > config.conservative.freq_threshold) if config.conservative.apply_rfi_mask: c_freq_flag &= np.logical_not(rfi.frequency_mask(freq)) c_flag = c_flag & c_freq_flag[:, np.newaxis, np.newaxis] c_good_freq = np.flatnonzero(c_freq_flag) logger.info("Number good frequencies (conservative thresholds) %d" % c_good_freq.size) ## Apply input flags flag &= np.all(inpflg[:, np.newaxis, :, :], axis=0) ## Update flags based on beam flag if config.beam_flag_file is not None: dbeam = andata.BaseData.from_acq_h5(config.beam_flag_file) db_csd = np.floor(ephemeris.unix_to_csd(dbeam.index_map['time'][:])) for ii, name in enumerate(config.beam_flag_datasets): logger.info("Applying %s beam flag." % name) if not ii: db_flag = dbeam.flags[name][:] else: db_flag &= dbeam.flags[name][:] cnt = 0 for ii, dbc in enumerate(db_csd): this_csd = np.flatnonzero(np.any(kcsd == dbc, axis=0)) if this_csd.size > 0: logger.info("Beam flag for %d matches %s." % (dbc, str(kcsd[:, this_csd]))) flag[:, :, this_csd] &= db_flag[np.newaxis, :, ii, np.newaxis] cnt += 1 logger.info("Applied %0.1f percent of the beam flags" % (100.0 * cnt / float(db_csd.size), )) ## Flag inputs with large amount of missing data input_frac_flagged = ( np.sum(flag[good_freq, :, :], axis=(0, 2), dtype=np.float32) / float(good_freq.size * ntransit)) input_flag = input_frac_flagged > config.input_threshold for ii in config.index_phase_ref: logger.info("Phase reference %d has %0.3f fraction of data flagged." % (ii, input_frac_flagged[ii])) input_flag[ii] = True good_input = np.flatnonzero(input_flag) flag = flag & input_flag[np.newaxis, :, np.newaxis] logger.info("Number good inputs %d" % good_input.size) ## Calibrate gaincal = gain[0] * tools.invert_no_zero(gain[1]) frac_err_cal = np.sqrt(frac_err[0]**2 + frac_err[1]**2) count = np.sum(flag, axis=-1, dtype=np.int) stat_flag = count > config.min_num_transit ## Calculate phase amp = np.abs(gaincal) phi = np.angle(gaincal) ## Calculate polarisation groups pol_dict = {'E': 'X', 'S': 'Y'} cyl_dict = {2: 'A', 3: 'B', 4: 'C', 5: 'D'} if config.group_by_cyl: group_id = [ (inp.pol, inp.cyl) if tools.is_chime(inp) and (ii in good_input) else None for ii, inp in enumerate(inputmap) ] else: group_id = [ inp.pol if tools.is_chime(inp) and (ii in good_input) else None for ii, inp in enumerate(inputmap) ] ugroup_id = sorted([uidd for uidd in set(group_id) if uidd is not None]) ngroup = len(ugroup_id) group_list_noref = [ np.array([ gg for gg, gid in enumerate(group_id) if (gid == ugid) and gg not in config.index_phase_ref ]) for ugid in ugroup_id ] group_list = [ np.array([gg for gg, gid in enumerate(group_id) if gid == ugid]) for ugid in ugroup_id ] if config.group_by_cyl: group_str = [ "%s-%s" % (pol_dict[pol], cyl_dict[cyl]) for pol, cyl in ugroup_id ] else: group_str = [pol_dict[pol] for pol in ugroup_id] index_phase_ref = [] for gstr, igroup in zip(group_str, group_list): candidate = [ii for ii in config.index_phase_ref if ii in igroup] if len(candidate) != 1: index_phase_ref.append(None) else: index_phase_ref.append(candidate[0]) logger.info( "Phase reference: %s" % ', '.join(['%s = %s' % tpl for tpl in zip(group_str, index_phase_ref)])) ## Apply thermal correction to amplitude if config.amp_thermal.enabled: logger.info("Applying thermal correction.") # Load the temperatures tdata = TempData.from_acq_h5(config.amp_thermal.filename) index = tdata.search_sensors(config.amp_thermal.sensor)[0] temp = tdata.datasets[config.amp_thermal.field][index] temp_func = scipy.interpolate.interp1d(tdata.time, temp, **config.amp_thermal.interp) itemp = temp_func(timestamp) dtemp = itemp[0] - itemp[1] flag_func = scipy.interpolate.interp1d( tdata.time, tdata.datasets['flag'][index].astype(np.float32), **config.amp_thermal.interp) dtemp_flag = np.all(flag_func(timestamp) == 1.0, axis=0) flag &= dtemp_flag[np.newaxis, np.newaxis, :] for gstr, igroup in zip(group_str, group_list): pstr = gstr[0] thermal_coeff = np.polyval(config.amp_thermal.coeff[pstr], freq) gthermal = 1.0 + thermal_coeff[:, np.newaxis, np.newaxis] * dtemp[ np.newaxis, np.newaxis, :] amp[:, igroup, :] *= tools.invert_no_zero(gthermal) ## Compute common mode if config.subtract_common_mode_before: logger.info("Calculating common mode amplitude and phase.") cmn_amp, flag_cmn_amp = compute_common_mode(amp, flag, group_list_noref, median=False) cmn_phi, flag_cmn_phi = compute_common_mode(phi, flag, group_list_noref, median=False) # Subtract common mode (from phase only) logger.info("Subtracting common mode phase.") group_flag = np.zeros((ngroup, ninput), dtype=np.bool) for gg, igroup in enumerate(group_list): group_flag[gg, igroup] = True phi[:, igroup, :] = phi[:, igroup, :] - cmn_phi[:, gg, np.newaxis, :] for iref in index_phase_ref: if (iref is not None) and (iref in igroup): flag[:, iref, :] = flag_cmn_phi[:, gg, :] ## If requested, determine and subtract a delay template if config.fit_delay_before: logger.info("Fitting delay template.") omega = timing.FREQ_TO_OMEGA * freq tau, tau_flag, _ = construct_delay_template( omega, phi, c_flag & flag, min_num_freq_for_delay_fit=config.min_num_freq_for_delay_fit) # Compute residuals logger.info("Subtracting delay template.") phi = phi - tau[np.newaxis, :, :] * omega[:, np.newaxis, np.newaxis] ## Normalize by median over time logger.info("Calculating median amplitude and phase.") med_amp = np.zeros((nfreq, ninput, ndiff), dtype=amp.dtype) med_phi = np.zeros((nfreq, ninput, ndiff), dtype=phi.dtype) count_by_diff = np.zeros((nfreq, ninput, ndiff), dtype=np.int) stat_flag_by_diff = np.zeros((nfreq, ninput, ndiff), dtype=np.bool) def weighted_mean(yy, ww, axis=-1): return np.sum(ww * yy, axis=axis) * tools.invert_no_zero( np.sum(ww, axis=axis)) for dd in range(ndiff): this_diff = np.flatnonzero(lbl_diff == dd) this_flag = flag[:, :, this_diff] this_amp = amp[:, :, this_diff] this_amp_err = this_amp * frac_err_cal[:, :, this_diff] * this_flag.astype( np.float32) this_phi = phi[:, :, this_diff] this_phi_err = frac_err_cal[:, :, this_diff] * this_flag.astype( np.float32) count_by_diff[:, :, dd] = np.sum(this_flag, axis=-1, dtype=np.int) stat_flag_by_diff[:, :, dd] = count_by_diff[:, :, dd] > config.min_num_transit if config.weighted_mean == 2: logger.info("Calculating inverse variance weighted mean.") med_amp[:, :, dd] = weighted_mean(this_amp, tools.invert_no_zero(this_amp_err**2), axis=-1) med_phi[:, :, dd] = weighted_mean(this_phi, tools.invert_no_zero(this_phi_err**2), axis=-1) elif config.weighted_mean == 1: logger.info("Calculating uniform weighted mean.") med_amp[:, :, dd] = weighted_mean(this_amp, this_flag.astype(np.float32), axis=-1) med_phi[:, :, dd] = weighted_mean(this_phi, this_flag.astype(np.float32), axis=-1) else: logger.info("Calculating median value.") for ff in range(nfreq): for ii in range(ninput): if np.any(this_flag[ff, ii, :]): med_amp[ff, ii, dd] = wq.median( this_amp[ff, ii, :], this_flag[ff, ii, :].astype(np.float32)) med_phi[ff, ii, dd] = wq.median( this_phi[ff, ii, :], this_flag[ff, ii, :].astype(np.float32)) damp = np.zeros_like(amp) dphi = np.zeros_like(phi) for dd in range(ndiff): this_diff = np.flatnonzero(lbl_diff == dd) damp[:, :, this_diff] = amp[:, :, this_diff] * tools.invert_no_zero( med_amp[:, :, dd, np.newaxis]) - 1.0 dphi[:, :, this_diff] = phi[:, :, this_diff] - med_phi[:, :, dd, np.newaxis] # Compute common mode if not config.subtract_common_mode_before: logger.info("Calculating common mode amplitude and phase.") cmn_amp, flag_cmn_amp = compute_common_mode(damp, flag, group_list_noref, median=True) cmn_phi, flag_cmn_phi = compute_common_mode(dphi, flag, group_list_noref, median=True) # Subtract common mode (from phase only) logger.info("Subtracting common mode phase.") group_flag = np.zeros((ngroup, ninput), dtype=np.bool) for gg, igroup in enumerate(group_list): group_flag[gg, igroup] = True dphi[:, igroup, :] = dphi[:, igroup, :] - cmn_phi[:, gg, np.newaxis, :] for iref in index_phase_ref: if (iref is not None) and (iref in igroup): flag[:, iref, :] = flag_cmn_phi[:, gg, :] ## Compute RMS logger.info("Calculating RMS of amplitude and phase.") mad_amp = np.zeros((nfreq, ninput), dtype=amp.dtype) std_amp = np.zeros((nfreq, ninput), dtype=amp.dtype) mad_phi = np.zeros((nfreq, ninput), dtype=phi.dtype) std_phi = np.zeros((nfreq, ninput), dtype=phi.dtype) mad_amp_by_diff = np.zeros((nfreq, ninput, ndiff), dtype=amp.dtype) std_amp_by_diff = np.zeros((nfreq, ninput, ndiff), dtype=amp.dtype) mad_phi_by_diff = np.zeros((nfreq, ninput, ndiff), dtype=phi.dtype) std_phi_by_diff = np.zeros((nfreq, ninput, ndiff), dtype=phi.dtype) for ff in range(nfreq): for ii in range(ninput): this_flag = flag[ff, ii, :] if np.any(this_flag): std_amp[ff, ii] = np.std(damp[ff, ii, this_flag]) std_phi[ff, ii] = np.std(dphi[ff, ii, this_flag]) mad_amp[ff, ii] = 1.48625 * wq.median( np.abs(damp[ff, ii, :]), this_flag.astype(np.float32)) mad_phi[ff, ii] = 1.48625 * wq.median( np.abs(dphi[ff, ii, :]), this_flag.astype(np.float32)) for dd in range(ndiff): this_diff = this_flag & (lbl_diff == dd) if np.any(this_diff): std_amp_by_diff[ff, ii, dd] = np.std(damp[ff, ii, this_diff]) std_phi_by_diff[ff, ii, dd] = np.std(dphi[ff, ii, this_diff]) mad_amp_by_diff[ff, ii, dd] = 1.48625 * wq.median( np.abs(damp[ff, ii, :]), this_diff.astype(np.float32)) mad_phi_by_diff[ff, ii, dd] = 1.48625 * wq.median( np.abs(dphi[ff, ii, :]), this_diff.astype(np.float32)) ## Construct delay template if not config.fit_delay_before: logger.info("Fitting delay template.") omega = timing.FREQ_TO_OMEGA * freq tau, tau_flag, _ = construct_delay_template( omega, dphi, c_flag & flag, min_num_freq_for_delay_fit=config.min_num_freq_for_delay_fit) # Compute residuals logger.info("Subtracting delay template from phase.") resid = (dphi - tau[np.newaxis, :, :] * omega[:, np.newaxis, np.newaxis]) * flag.astype(np.float32) else: resid = dphi tau_count = np.sum(tau_flag, axis=-1, dtype=np.int) tau_stat_flag = tau_count > config.min_num_transit tau_count_by_diff = np.zeros((ninput, ndiff), dtype=np.int) tau_stat_flag_by_diff = np.zeros((ninput, ndiff), dtype=np.bool) for dd in range(ndiff): this_diff = np.flatnonzero(lbl_diff == dd) tau_count_by_diff[:, dd] = np.sum(tau_flag[:, this_diff], axis=-1, dtype=np.int) tau_stat_flag_by_diff[:, dd] = tau_count_by_diff[:, dd] > config.min_num_transit ## Calculate statistics of residuals std_resid = np.zeros((nfreq, ninput), dtype=phi.dtype) mad_resid = np.zeros((nfreq, ninput), dtype=phi.dtype) std_resid_by_diff = np.zeros((nfreq, ninput, ndiff), dtype=phi.dtype) mad_resid_by_diff = np.zeros((nfreq, ninput, ndiff), dtype=phi.dtype) for ff in range(nfreq): for ii in range(ninput): this_flag = flag[ff, ii, :] if np.any(this_flag): std_resid[ff, ii] = np.std(resid[ff, ii, this_flag]) mad_resid[ff, ii] = 1.48625 * wq.median( np.abs(resid[ff, ii, :]), this_flag.astype(np.float32)) for dd in range(ndiff): this_diff = this_flag & (lbl_diff == dd) if np.any(this_diff): std_resid_by_diff[ff, ii, dd] = np.std(resid[ff, ii, this_diff]) mad_resid_by_diff[ff, ii, dd] = 1.48625 * wq.median( np.abs(resid[ff, ii, :]), this_diff.astype(np.float32)) ## Calculate statistics of delay template mad_tau = np.zeros((ninput, ), dtype=phi.dtype) std_tau = np.zeros((ninput, ), dtype=phi.dtype) mad_tau_by_diff = np.zeros((ninput, ndiff), dtype=phi.dtype) std_tau_by_diff = np.zeros((ninput, ndiff), dtype=phi.dtype) for ii in range(ninput): this_flag = tau_flag[ii] if np.any(this_flag): std_tau[ii] = np.std(tau[ii, this_flag]) mad_tau[ii] = 1.48625 * wq.median(np.abs(tau[ii]), this_flag.astype(np.float32)) for dd in range(ndiff): this_diff = this_flag & (lbl_diff == dd) if np.any(this_diff): std_tau_by_diff[ii, dd] = np.std(tau[ii, this_diff]) mad_tau_by_diff[ii, dd] = 1.48625 * wq.median( np.abs(tau[ii]), this_diff.astype(np.float32)) ## Define output res = { "timestamp": { "data": timestamp, "axis": ["div", "time"] }, "is_daytime": { "data": is_daytime, "axis": ["div", "time"] }, "csd": { "data": kcsd, "axis": ["div", "time"] }, "pair_map": { "data": lbl_diff, "axis": ["time"] }, "pair_count": { "data": cnt_diff, "axis": ["pair"] }, "gain": { "data": gaincal, "axis": ["freq", "input", "time"] }, "frac_err": { "data": frac_err_cal, "axis": ["freq", "input", "time"] }, "flags/gain": { "data": flag, "axis": ["freq", "input", "time"], "flag": True }, "flags/gain_conservative": { "data": c_flag, "axis": ["freq", "input", "time"], "flag": True }, "flags/count": { "data": count, "axis": ["freq", "input"], "flag": True }, "flags/stat": { "data": stat_flag, "axis": ["freq", "input"], "flag": True }, "flags/count_by_pair": { "data": count_by_diff, "axis": ["freq", "input", "pair"], "flag": True }, "flags/stat_by_pair": { "data": stat_flag_by_diff, "axis": ["freq", "input", "pair"], "flag": True }, "med_amp": { "data": med_amp, "axis": ["freq", "input", "pair"] }, "med_phi": { "data": med_phi, "axis": ["freq", "input", "pair"] }, "flags/group_flag": { "data": group_flag, "axis": ["group", "input"], "flag": True }, "cmn_amp": { "data": cmn_amp, "axis": ["freq", "group", "time"] }, "cmn_phi": { "data": cmn_phi, "axis": ["freq", "group", "time"] }, "amp": { "data": damp, "axis": ["freq", "input", "time"] }, "phi": { "data": dphi, "axis": ["freq", "input", "time"] }, "std_amp": { "data": std_amp, "axis": ["freq", "input"] }, "std_amp_by_pair": { "data": std_amp_by_diff, "axis": ["freq", "input", "pair"] }, "mad_amp": { "data": mad_amp, "axis": ["freq", "input"] }, "mad_amp_by_pair": { "data": mad_amp_by_diff, "axis": ["freq", "input", "pair"] }, "std_phi": { "data": std_phi, "axis": ["freq", "input"] }, "std_phi_by_pair": { "data": std_phi_by_diff, "axis": ["freq", "input", "pair"] }, "mad_phi": { "data": mad_phi, "axis": ["freq", "input"] }, "mad_phi_by_pair": { "data": mad_phi_by_diff, "axis": ["freq", "input", "pair"] }, "tau": { "data": tau, "axis": ["input", "time"] }, "flags/tau": { "data": tau_flag, "axis": ["input", "time"], "flag": True }, "flags/tau_count": { "data": tau_count, "axis": ["input"], "flag": True }, "flags/tau_stat": { "data": tau_stat_flag, "axis": ["input"], "flag": True }, "flags/tau_count_by_pair": { "data": tau_count_by_diff, "axis": ["input", "pair"], "flag": True }, "flags/tau_stat_by_pair": { "data": tau_stat_flag_by_diff, "axis": ["input", "pair"], "flag": True }, "std_tau": { "data": std_tau, "axis": ["input"] }, "std_tau_by_pair": { "data": std_tau_by_diff, "axis": ["input", "pair"] }, "mad_tau": { "data": mad_tau, "axis": ["input"] }, "mad_tau_by_pair": { "data": mad_tau_by_diff, "axis": ["input", "pair"] }, "resid_phi": { "data": resid, "axis": ["freq", "input", "time"] }, "std_resid_phi": { "data": std_resid, "axis": ["freq", "input"] }, "std_resid_phi_by_pair": { "data": std_resid_by_diff, "axis": ["freq", "input", "pair"] }, "mad_resid_phi": { "data": mad_resid, "axis": ["freq", "input"] }, "mad_resid_phi_by_pair": { "data": mad_resid_by_diff, "axis": ["freq", "input", "pair"] }, } ## Create the output container logger.info("Creating StabilityData container.") data = StabilityData() data.create_index_map( "div", np.array(["numerator", "denominator"], dtype=np.string_)) data.create_index_map("pair", np.array(uniq_diff, dtype=np.string_)) data.create_index_map("group", np.array(group_str, dtype=np.string_)) data.create_index_map("freq", freq) data.create_index_map("input", inputs) data.create_index_map("time", timestamp[0, :]) logger.info("Writing datsets to container.") for name, dct in res.iteritems(): is_flag = dct.get('flag', False) if is_flag: dset = data.create_flag(name.split('/')[-1], data=dct['data']) else: dset = data.create_dataset(name, data=dct['data']) dset.attrs['axis'] = np.array(dct['axis'], dtype=np.string_) data.attrs['phase_ref'] = np.array( [iref for iref in index_phase_ref if iref is not None]) # Determine the output filename and save results start_time, end_time = ephemeris.unix_to_datetime( np.percentile(timestamp, [0, 100])) tfmt = "%Y%m%d" night_str = 'night_' if not np.any(is_daytime) else '' output_file = os.path.join( config.output_dir, "%s_%s_%sraw_stability_data.h5" % (start_time.strftime(tfmt), end_time.strftime(tfmt), night_str)) logger.info("Saving results to %s." % output_file) data.save(output_file)
from ch_util import rfi from ch_util import finder from ch_pipeline.core import containers from ch_pipeline.analysis.flagging import daytime_flag from complex_gain.temps import TempData from complex_gain.sutil import construct_delay_template, compute_common_mode ################################################### # default variables ################################################### DEFAULTS = NameSpace( load_yaml_config( os.path.join(os.path.dirname(os.path.realpath(__file__)), 'defaults.yaml') + ':stability')) LOG_FILE = os.environ.get( 'STABILITY_LOG_FILE', os.path.join(os.path.dirname(os.path.realpath(__file__)), 'stability.log')) DEFAULT_LOGGING = { 'formatters': { 'std': { 'format': "%(asctime)s %(levelname)s %(name)s: %(message)s", 'datefmt': "%m/%d %H:%M:%S" }, }, 'handlers': { 'stderr': {
def main(filename, config_file=None, logging_params=DEFAULT_LOGGING): # Load config config = DEFAULTS.deepcopy() if config_file is not None: config.merge(NameSpace(load_yaml_config(config_file))) # Setup logging log.setup_logging(logging_params) logger = log.get_logger(__name__) # Load the data dsets = config.datasets + ['flags/%s' % name for name in config.flags ] + ['timestamp', 'pair_map'] logger.info("Requesting datasets: %s" % str(dsets)) data = StabilityData.from_acq_h5(filename, datasets=dsets) logger.info("Loaded datasets: %s" % str(data.datasets.keys())) # Load the temperatures tdata = TempData.from_acq_h5(config.temp_filename) # Interpolate requested temperatures to time of transits if config.sensors is not None: index = np.sort( np.concatenate( tuple([tdata.search_sensors(name) for name in config.sensors]))) else: index = slice(None) temp = tdata.datasets[config.temp_field][index] temp_func = scipy.interpolate.interp1d(tdata.time, temp, axis=-1, **config.interp) itemp = temp_func(data.datasets['timestamp'][:]) # Difference temperatures between two transits feature = [] dtemp = np.zeros((itemp.shape[-1], itemp.shape[0]), dtype=itemp.dtype) for isensor, stemp in enumerate(itemp): if config.is_ns_dist and config.is_ns_dist[isensor]: feature.append(tdata.sensor[index[isensor]] + '_ns_dist') coeff = np.array([[ np.sin( np.radians(FluxCatalog[ss].dec - ephemeris.CHIMELATITUDE)) for ss in pair.decode("UTF-8").split('/') ] for pair in data.index_map['pair'][data.datasets['pair_map']]]).T dtemp[:, isensor] = coeff[0] * stemp[0] - coeff[1] * stemp[1] else: feature.append(tdata.sensor[index[isensor]]) dtemp[:, isensor] = stemp[0] - stemp[1] # Generate flags for the temperature data flag_func = scipy.interpolate.interp1d( tdata.time, tdata.datasets['flag'][index].astype(np.float32), axis=-1, **config.interp) dtemp_flag = np.all(flag_func(data.datasets['timestamp'][:]) == 1.0, axis=(0, 1)) # Add temperature information to data object data.create_index_map('feature', np.array(feature, dtype=np.string_)) dset = data.create_dataset("temp", data=dtemp) dset.attrs['axis'] = np.array(['time', 'feature'], dtype=np.string_) dset = data.create_flag("temp", data=dtemp_flag) dset.attrs['axis'] = np.array(['time', 'feature'], dtype=np.string_) # Perform the fit for dkey, fkey in zip(config.datasets, config.flags): logger.info("Now fitting %s using %s flags" % (dkey, fkey)) this_data = data.datasets[dkey][:] expand = tuple([None] * (this_data.ndim - 1) + [slice(None)]) this_flag = data.flags[fkey][:] & dtemp_flag[expand] fitter = TempRegression(dtemp, this_data, flag=this_flag) fitter.process() # Save results for out in ['model', 'resid']: dset = data.create_dataset('_'.join([config.prefix, out, dkey]), data=getattr(fitter, out)) dset.attrs['axis'] = data.datasets[dkey].attrs['axis'].copy() for stat in ['mad', 'std']: dset = data.create_dataset( '_'.join([stat, config.prefix, out, dkey]), data=getattr(fitter, '_'.join([stat, out]))) dset.attrs['axis'] = data.datasets[dkey].attrs[ 'axis'][:-1].copy() for out in ['intercept', 'number']: dset = data.create_dataset('_'.join([config.prefix, out, dkey]), data=getattr(fitter, out)) dset.attrs['axis'] = data.datasets[dkey].attrs['axis'][:-1].copy() dset = data.create_dataset('_'.join([config.prefix, 'coeff', dkey]), data=fitter.coeff) dset.attrs['axis'] = np.array( list(data.datasets[dkey].attrs['axis'][:-1]) + ['feature'], dtype=np.string_) # Save the results to disk output_filename = os.path.splitext( filename)[0] + '_' + config.output_suffix + '.h5' data.save(output_filename)
from wtl.namespace import NameSpace from wtl.config import load_yaml_config from ch_util.fluxcat import FluxCatalog from ch_util import ephemeris from temps import TempData from stability import StabilityData ################################################### # default variables ################################################### DEFAULTS = NameSpace( load_yaml_config( os.path.join(os.path.dirname(os.path.realpath(__file__)), 'defaults.yaml') + ':regress_temp')) LOG_FILE = os.environ.get( 'REGRESS_TEMP_LOG_FILE', os.path.join(os.path.dirname(os.path.realpath(__file__)), 'regress_temp.log')) DEFAULT_LOGGING = { 'formatters': { 'std': { 'format': "%(asctime)s %(levelname)s %(name)s: %(message)s", 'datefmt': "%m/%d %H:%M:%S" }, }, 'handlers': {
def main(config_file=None, logging_params=DEFAULT_LOGGING): # Setup logging log.setup_logging(logging_params) mlog = log.get_logger(__name__) # Set config config = DEFAULTS.deepcopy() if config_file is not None: config.merge(NameSpace(load_yaml_config(config_file))) # Set niceness current_niceness = os.nice(0) os.nice(config.niceness - current_niceness) mlog.info('Changing process niceness from %d to %d. Confirm: %d' % (current_niceness, config.niceness, os.nice(0))) # Find acquisition files acq_files = sorted( glob.glob(os.path.join(config.data_dir, config.acq, "*.h5"))) nfiles = len(acq_files) # Find cal files if config.time_iter and (config.cal_acq is not None): cal_files = sorted( glob.glob(os.path.join(config.data_dir, config.cal_acq, "*.h5"))) ncal_files = len(cal_files) mlog.info('Found %d chimecal files.' % ncal_files) cal_rdr = andata.CorrData.from_acq_h5(cal_files, datasets=()) else: ncal_files = 0 # Create transit tracker transit_tracker = TransitTracker(nsigma=config.nsigma, shift=config.shift) for name in config.all_sources: transit_tracker[name] = FluxCatalog[name].skyfield for aa in acq_files: transit_tracker.add_file(aa) transit_files = transit_tracker.get_transits() for src, csd, is_day, files, aa, bb in transit_files: mlog.info("%s | CSD %d | %d | (%d, %d)" % (src, csd, is_day, aa, bb)) for ff in files: mlog.info("%s" % ff) mlog.info(''.join(['-'] * 80)) # Specify some parameters for algorithm N = 2048 noffset = len(config.offsets) if config.sep_pol: rank = 1 cross_pol = False pol = np.array(['S', 'E']) pol_s = np.array( [rr + 256 * xx for xx in range(0, 8, 2) for rr in range(256)]) pol_e = np.array( [rr + 256 * xx for xx in range(1, 8, 2) for rr in range(256)]) prod_ss = [] prod_ee = [] else: rank = 2 cross_pol = config.cross_pol pol = np.array(['all']) npol = pol.size # Create file prefix and suffix prefix = [] prefix.append("gain_solutions") if config.output_prefix is not None: prefix.append(config.output_prefix) prefix = '_'.join(prefix) suffix = [] suffix.append("pol_%s" % '_'.join(pol)) if config.time_iter: suffix.append("time_iter") else: suffix.append("niter_%d" % config.niter) if cross_pol: suffix.append("zerocross") else: suffix.append("keepcross") if config.normalize: suffix.append("normed") else: suffix.append("notnormed") suffix = '_'.join(suffix) # Loop over source transits for src, csd, is_day, files, start, stop in transit_files: if src not in config.all_sources: continue if (config.good_csd is not None) and (csd not in config.good_csd): continue mlog.info("%s | CSD %d | %d | (%d, %d)" % (src, csd, is_day, start, stop)) nfiles = len(files) output_file = os.path.join( config.output_dir, "%s_%s_CSD_%d_%s.pickle" % (prefix, src, csd, suffix)) mlog.info("Saving to: %s" % output_file) # Get info about this set of files data = andata.CorrData.from_acq_h5(files, datasets=['flags/inputs'], start=start, stop=stop) ra = ephemeris.lsa(data.time) prod = data.prod fstart = config.freq_start if config.freq_start is not None else 0 fstop = config.freq_stop if config.freq_stop is not None else data.freq.size freq_index = range(fstart, fstop) freq = data.freq[freq_index] ntime = ra.size nfreq = freq.size # Determind bad inputs if config.bad_input_file is None or not os.path.isfile( config.bad_input_file): bad_input = np.flatnonzero( ~np.all(data.flags['inputs'][:], axis=-1)) else: with open(config.bad_input_file, 'r') as handler: bad_input = pickle.load(handler) mlog.info("%d inputs flagged as bad." % bad_input.size) bad_prod = np.array([ ii for ii, pp in enumerate(prod) if (pp[0] in bad_input) or (pp[1] in bad_input) ]) # Determine time range of each file findex = [] tindex = [] for ii, filename in enumerate(files): subdata = andata.CorrData.from_acq_h5(filename, datasets=()) findex += [ii] * subdata.ntime tindex += range(subdata.ntime) findex = np.array(findex[start:stop]) tindex = np.array(tindex[start:stop]) frng = [] for ii in range(nfiles): this_file = np.flatnonzero(findex == ii) this_tindex = tindex[this_file] frng.append((this_tindex.min(), this_tindex.max() + 1)) # Create arrays to hold the results ores = {} ores['index_map'] = {} ores['index_map']['ra'] = ra ores['index_map']['time'] = data.time ores['index_map']['freq'] = freq ores['index_map']['offsets'] = np.array(config.offsets) ores['index_map']['pol'] = pol ores['evalue'] = np.zeros((noffset, nfreq, ntime, N), dtype=np.float32) ores['resp'] = np.zeros((noffset, nfreq, ntime, N, config.neigen), dtype=np.complex64) ores['resp_err'] = np.zeros((noffset, nfreq, ntime, N, config.neigen), dtype=np.float32) # Loop over frequencies for ff, find in enumerate(freq_index): mlog.info("Freq %d of %d." % (ff + 1, nfreq)) cnt = 0 ev, evec = None, None if ncal_files > 0: ifc = int(np.argmin(np.abs(freq[ff] - cal_rdr.freq))) diff_time = np.abs(data.time[cnt] - cal_rdr.time) good_diff = np.flatnonzero(np.isfinite(diff_time)) itc = int(good_diff[np.argmin(diff_time[good_diff])]) - 1 print good_diff.size print data.time[cnt], cal_rdr.time[itc] cal = andata.CorrData.from_acq_h5(cal_files, datasets=['eval', 'evec'], freq_sel=ifc, start=itc, stop=itc + 2) print cal.time mlog.info( "Using eigenvectors from %d time sample (%0.2f sec offset) to initialize backfill." % (itc, cal.time[0] - data.time[cnt])) mlog.info( "Using eigenvectors for freq %0.2f MHz (for %0.2f MHz)." % (cal.freq[0], freq[ff])) ev = cal['eval'][0, ::-1, 0] evec = cal['evec'][0, ::-1, :, 0].T mlog.info("Initial eval shape %s, evec shape %s" % (str(ev.shape), str(evec.shape))) # Loop over files for ii, filename in enumerate(files): aa, bb = frng[ii] # Loop over times for tt in range(aa, bb): t0 = time.time() mlog.info("Time %d of %d." % (cnt + 1, ntime)) # Load visibilities with h5py.File(filename, 'r') as hf: vis = hf['vis'][find, :, tt] # Set bad products equal to zero vis[bad_prod] = 0.0 # Different code if we are separating polarisations if config.sep_pol: if not any(prod_ss): for pind, pp in enumerate(prod): if (pp[0] in pol_s) and (pp[1] in pol_s): prod_ss.append(pind) elif (pp[0] in pol_e) and (pp[1] in pol_e): prod_ee.append(pind) prod_ss = np.array(prod_ss) prod_ee = np.array(prod_ee) mlog.info("Product sizes: %d, %d" % (prod_ss.size, prod_ee.size)) # Loop over polarisations for pp, (input_pol, prod_pol) in enumerate([(pol_s, prod_ss), (pol_e, prod_ee)]): visp = vis[prod_pol] mlog.info("pol %s, visibility size: %d" % (pol[pp], visp.size)) # Loop over offsets for oo, off in enumerate(config.offsets): mlog.info( "pol %s, rank %d, niter %d, offset %d, cross_pol %s, neigen %d, intracyl_diag %d" % (pol[pp], rank, config.niter, off, cross_pol, config.neigen, int(config.intracyl_diag))) ev, evec, rr, rre = solve_gain( visp, cutoff=off, intracyl_diag=config.intracyl_diag, cross_pol=cross_pol, normalize=config.normalize, niter=config.niter, neigen=config.neigen, rank=rank, cyl_size=config.cyl_size, time_iter=config.time_iter, eigenvalue=ev, eigenvector=evec) ores['evalue'][oo, ff, cnt, input_pol] = ev ores['resp'][oo, ff, cnt, input_pol, :] = rr ores['resp_err'][oo, ff, cnt, input_pol, :] = rre else: # Loop over offsets for oo, off in enumerate(config.offsets): mlog.info( "rank %d, niter %d, offset %d, cross_pol %s, neigen %d, intracyl_diag %d" % (rank, config.niter, off, cross_pol, config.neigen, int(config.intracyl_diag))) ev, evec, rr, rre = solve_gain( vis, cutoff=off, intracyl_diag=config.intracyl_diag, cross_pol=cross_pol, normalize=config.normalize, niter=config.niter, neigen=config.neigen, rank=rank, cyl_size=config.cyl_size, time_iter=config.time_iter, eigenvalue=ev, eigenvector=evec) ores['evalue'][oo, ff, cnt, :] = ev ores['resp'][oo, ff, cnt, :, :] = rr ores['resp_err'][oo, ff, cnt, :, :] = rre # Increment time counter cnt += 1 # Print time elapsed mlog.info("Took %0.1f seconds." % (time.time() - t0, )) # Save to pickle file with h5py.File(output_file, 'w') as handler: handler.attrs['src'] = src handler.attrs['csd'] = csd for key, val in ores.iteritems(): if isinstance(val, dict): group = handler.create_group(key) for kk, vv in val.iteritems(): group.create_dataset(kk, data=vv[:]) else: handler.create_dataset(key, data=val[:]) # Remove this source from list if config.single_csd: config.all_sources.remove(src)