def get(args, fmt='numpy'): """ Load gains and ms data from generate.py args.""" # MS name ms_file = args.ms # Load ms ms = xds_from_table(ms_file)[0] # Get time-bin indices and counts, normalising the scale to start from 0 _, tbin_indices, tbin_counts = chunkify_rows(ms.TIME, 1) tbin_indices -= tbin_indices.min() # Get antenna tables ant1 = ms.ANTENNA1.data ant2 = ms.ANTENNA2.data # Get corrupted data vis = ms.DATA.data[:, :, 0] # Concatenate source models in direction column and remove # correlation axis model = concat_dir_axis(ms)[:, :, :, 0] # Get weight column, removing correlation axis weight = ms.WEIGHT.data[:, 0] # Gains name if args.out == '': args.out = f"{args.mode}.npy" # Load gains data jones = None with open(args.out, 'rb') as file: jones = np.load(file) # Remove correlation axis on jones term jones = jones[:, :, :, :, 0].astype(np.complex128) # Format to numpy arrays if fmt == "numpy": ant1 = ant1.compute() ant2 = ant2.compute() vis = vis.compute().astype(np.complex128) model = model.compute().astype(np.complex128) weight = weight.compute().astype(np.complex128) # Format to dask arrays elif fmt == "dask": # Jones dimensions n_ant, _, n_chan, n_dir = jones.shape # Chunking scheme jones_chunks = (n_ant, 1, n_chan, n_dir) # Jones to dask jones = da.from_array(jones, chunks=jones_chunks) return tbin_indices, tbin_counts, ant1, ant2,\ vis, model, weight, jones
def test_residual_vis_dask(data_factory, corr_shape, jones_shape): da = pytest.importorskip("dask.array") # simulate noise free data with only DIE's n_dir = 3 n_time = 32 n_chan = 16 n_ant = 4 sigma_n = 0.0 sigma_f = 0.05 data_dict = data_factory(sigma_n, sigma_f, n_time, n_chan, n_ant, n_dir, corr_shape, jones_shape) vis = data_dict['DATA'] ant1 = data_dict['ANTENNA1'] ant2 = data_dict['ANTENNA2'] model = data_dict['MODEL_DATA'] # what we need to compare to jones = data_dict['JONES'] time = data_dict['TIME'] flag = data_dict['FLAG'] # get chunking scheme ncpu = 8 utimes_per_chunk = n_time // ncpu row_chunks, time_bin_idx, time_bin_counts = chunkify_rows( time, utimes_per_chunk) # set up dask arrays da_time_bin_idx = da.from_array(time_bin_idx, chunks=(utimes_per_chunk)) da_time_bin_counts = da.from_array(time_bin_counts, chunks=(utimes_per_chunk)) da_ant1 = da.from_array(ant1, chunks=row_chunks) da_ant2 = da.from_array(ant2, chunks=row_chunks) da_vis = da.from_array(vis, chunks=(row_chunks, (n_chan, )) + (corr_shape)) da_model = da.from_array(model, chunks=(row_chunks, (n_chan, ), (n_dir, )) + (corr_shape)) da_jones = da.from_array(jones, chunks=(utimes_per_chunk, n_ant, n_chan, n_dir) + jones_shape) da_flag = da.from_array(flag, chunks=(row_chunks, (n_chan, )) + (corr_shape)) from africanus.calibration.utils import residual_vis as residual_vis_np residual = residual_vis_np(time_bin_idx, time_bin_counts, ant1, ant2, jones, vis, flag, model) from africanus.calibration.utils.dask import residual_vis da_residual = residual_vis(da_time_bin_idx, da_time_bin_counts, da_ant1, da_ant2, da_jones, da_vis, da_flag, da_model) residual2 = da_residual.compute() assert_array_almost_equal(residual, residual2, decimal=10)
def _jones2col(**kw): args = OmegaConf.create(kw) from omegaconf import ListConfig if not isinstance(args.ms, list) and not isinstance(args.ms, ListConfig): args.ms = [args.ms] OmegaConf.set_struct(args, True) import numpy as np from daskms.experimental.zarr import xds_from_zarr from daskms import xds_from_ms, xds_to_table import dask.array as da import dask from africanus.calibration.utils import chunkify_rows from africanus.calibration.utils.dask import corrupt_vis # get net gains G = xds_from_zarr(args.gain_table + '::G') # chunking info t_chunks = G[0].t_chunk.data if len(t_chunks) > 1: t_chunks = G[0].t_chunk.data[1:-1] - G[0].t_chunk.data[0:-2] assert (t_chunks == t_chunks[0]).all() utpc = t_chunks[0] else: utpc = t_chunks[0] times = xds_from_ms(args.ms[0], columns=['TIME'])[0].get('TIME').data.compute() row_chunks, tbin_idx, tbin_counts = chunkify_rows(times, utimes_per_chunk=utpc, daskify_idx=True) f_chunks = G[0].f_chunk.data if len(f_chunks) > 1: f_chunks = G[0].f_chunk.data[1:-1] - G[0].f_chunk.data[0:-2] assert (f_chunks == f_chunks[0]).all() chan_chunks = f_chunks[0] else: if f_chunks[0]: chan_chunks = f_chunks[0] else: chan_chunks = -1 columns = ('DATA', 'FLAG', 'FLAG_ROW', 'ANTENNA1', 'ANTENNA2') if args.acol is not None: columns += (args.acol,) # open MS xds = xds_from_ms(args.ms[0], chunks={'row': row_chunks, 'chan': chan_chunks}, columns=columns, group_cols=('FIELD_ID', 'DATA_DESC_ID', 'SCAN_NUMBER')) # Current hack probably only works for single field and DDID try: assert len(xds) == len(G) except Exception as e: raise ValueError("Number of datasets in gains do not " "match those in MS") # assuming scans are aligned out_data = [] for g, ds in zip(G, xds): try: assert g.SCAN_NUMBER == ds.SCAN_NUMBER except Exception as e: raise ValueError("Scans not aligned") nrow = ds.dims['row'] nchan = ds.dims['chan'] ncorr = ds.dims['corr'] # need to swap axes for africanus jones = da.swapaxes(g.gains.data, 1, 2) flag = ds.FLAG.data frow = ds.FLAG_ROW.data ant1 = ds.ANTENNA1.data ant2 = ds.ANTENNA2.data frow = (frow | (ant1 == ant2)) flag = (flag[:, :, 0] | flag[:, :, -1]) flag = da.logical_or(flag, frow[:, None]) if args.acol is not None: acol = ds.get(args.acol).data.reshape(nrow, nchan, 1, ncorr) else: acol = da.ones((nrow, nchan, 1, ncorr), chunks=(row_chunks, chan_chunks, 1, -1), dtype=jones.dtype) cvis = corrupt_vis(tbin_idx, tbin_counts, ant1, ant2, jones, acol) # compare where unflagged if args.compareto is not None: flag = flag.compute() vis = ds.get(args.compareto).values[~flag] print("Max abs difference = ", np.abs(cvis.compute()[~flag] - vis).max()) quit() out_ds = ds.assign(**{args.mueller_column: (("row", "chan", "corr"), cvis)}) out_data.append(out_ds) writes = xds_to_table(out_data, args.ms[0], columns=[args.mueller_column]) dask.compute(writes)
def new(ms, sky_model, gains, **kwargs): """Generate model visibilties per source (as direction axis) for stokes I and Q and generate relevant visibilities.""" # Options to attributed dictionary if kwargs["yaml"] is not None: options = ocf.load(kwargs["yaml"]) else: options = ocf.create(kwargs) # Set to struct ocf.set_struct(options, True) # Change path to sky model if chosen try: sky_model = sky_models[sky_model.lower()] except: # Own sky model reference pass # Set thread count to cpu count if options.ncpu: from multiprocessing.pool import ThreadPool import dask dask.config.set(pool=ThreadPool(options.ncpu)) else: import multiprocessing options.ncpu = multiprocessing.cpu_count() # Load gains to corrupt with with open(gains, "rb") as file: jones = np.load(file) # Load dimensions n_time, n_ant, n_chan, n_dir, n_corr = jones.shape n_row = n_time * (n_ant * (n_ant - 1) // 2) # Load ms MS = xds_from_ms(ms)[0] # Get time-bin indices and counts row_chunks, tbin_indices, tbin_counts = chunkify_rows( MS.TIME, options.utime) # Close and reopen with chunked rows MS.close() MS = xds_from_ms(ms, chunks={"row": row_chunks})[0] # Get antenna arrays (dask ignored for now) ant1 = MS.ANTENNA1.data ant2 = MS.ANTENNA2.data # Adjust UVW based on phase-convention if options.phase_convention.upper() == 'CASA': uvw = -MS.UVW.data.astype(np.float64) elif options.phase_convention.upper() == 'CODEX': uvw = MS.UVW.data.astype(np.float64) else: raise ValueError("Unknown sign convention for phase.") # MS dimensions dims = ocf.create(dict(MS.sizes)) # Close MS MS.close() # Build source model from lsm lsm = Tigger.load(sky_model) # Check if dimensions match jones assert n_time * (n_ant * (n_ant - 1) // 2) == dims.row assert n_time == len(tbin_indices) assert n_ant == np.max((np.max(ant1), np.max(ant2))) + 1 assert n_chan == dims.chan assert n_corr == dims.corr # If gains are DIE if options.die: assert n_dir == 1 n_dir = len(lsm.sources) else: assert n_dir == len(lsm.sources) # Get phase direction radec0_table = xds_from_table(ms + '::FIELD')[0] radec0 = radec0_table.PHASE_DIR.data.squeeze().compute() radec0_table.close() # Get frequency column freq_table = xds_from_table(ms + '::SPECTRAL_WINDOW')[0] freq = freq_table.CHAN_FREQ.data.astype(np.float64)[0] freq_table.close() # Get feed orientation feed_table = xds_from_table(ms + '::FEED')[0] feeds = feed_table.POLARIZATION_TYPE.data[0].compute() # Create initial model array model = np.zeros((n_dir, n_chan, n_corr), dtype=np.float64) # Create initial coordinate array and source names lm = np.zeros((n_dir, 2), dtype=np.float64) source_names = [] # Cycle coordinates creating a source with flux print("==> Building model visibilities") for d, source in enumerate(lsm.sources): # Extract name source_names.append(source.name) # Extract position radec_s = np.array([[source.pos.ra, source.pos.dec]]) lm[d] = radec_to_lm(radec_s, radec0) # Get flux - Stokes I if source.flux.I: I0 = source.flux.I # Get spectrum (only spi currently supported) tmp_spec = source.spectrum spi = [tmp_spec.spi if tmp_spec is not None else 0.0] ref_freq = [tmp_spec.freq0 if tmp_spec is not None else 1.0] # Generate model flux model[d, :, 0] = I0 * (freq / ref_freq)**spi # Get flux - Stokes Q if source.flux.Q: Q0 = source.flux.Q # Get spectrum tmp_spec = source.spectrum spi = [tmp_spec.spi if tmp_spec is not None else 0.0] ref_freq = [tmp_spec.freq0 if tmp_spec is not None else 1.0] # Generate model flux model[d, :, 1] = Q0 * (freq / ref_freq)**spi # Get flux - Stokes U if source.flux.U: U0 = source.flux.U # Get spectrum tmp_spec = source.spectrum spi = [tmp_spec.spi if tmp_spec is not None else 0.0] ref_freq = [tmp_spec.freq0 if tmp_spec is not None else 1.0] # Generate model flux model[d, :, 2] = U0 * (freq / ref_freq)**spi # Get flux - Stokes V if source.flux.V: V0 = source.flux.V # Get spectrum tmp_spec = source.spectrum spi = [tmp_spec.spi if tmp_spec is not None else 0.0] ref_freq = [tmp_spec.freq0 if tmp_spec is not None else 1.0] # Generate model flux model[d, :, 3] = V0 * (freq / ref_freq)**spi # Close sky-model del lsm # Build dask graph tbin_indices = da.from_array(tbin_indices, chunks=(options.utime)) tbin_counts = da.from_array(tbin_counts, chunks=(options.utime)) lm = da.from_array(lm, chunks=lm.shape) model = da.from_array(model, chunks=model.shape) jones = da.from_array(jones, chunks=(options.utime, ) + jones.shape[1::]) # Apply image to visibility for each source sources = [] for s in range(n_dir): source_vis = im_to_vis(model[s].reshape((1, n_chan, n_corr)), uvw, lm[s].reshape((1, 2)), freq, dtype=np.complex64, convention='fourier') sources.append(source_vis) model_vis = da.stack(sources, axis=2) # Sum over direction? if options.die: model_vis = da.sum(model_vis, axis=2, keepdims=True) n_dir = 1 source_names = [options.mname] # Select schema based on feed orientation if (feeds == ["X", "Y"]).all(): out_schema = [["XX", "XY"], ["YX", "YY"]] elif (feeds == ["R", "L"]).all(): out_schema = [['RR', 'RL'], ['LR', 'LL']] else: raise ValueError("Unknown feed orientation implementation.") # Convert Stokes to Correlations in_schema = ['I', 'Q', 'U', 'V'] model_vis = convert(model_vis, in_schema, out_schema).reshape( (n_row, n_chan, n_dir, n_corr)) # Apply gains to model_vis print("==> Corrupting visibilities") data = corrupt_vis(tbin_indices, tbin_counts, ant1, ant2, jones, model_vis) # Reopen MS MS = xds_from_ms(ms, chunks={"row": row_chunks})[0] # Assign model visibilities out_names = [] for d in range(n_dir): MS = MS.assign( **{ source_names[d]: (("row", "chan", "corr"), model_vis[:, :, d].astype(np.complex64)) }) out_names += [source_names[d]] # Assign noise free visibilities to 'CLEAN_DATA' MS = MS.assign( **{ 'CLEAN_' + options.dname: (("row", "chan", "corr"), data.astype(np.complex64)) }) out_names += ['CLEAN_' + options.dname] # Get noise realisation if options.std > 0.0: # Noise matrix print(f"==> Applying noise (std={options.std}) to visibilities") noise = [] for i in range(2): real = da.random.normal(loc=0.0, scale=options.std, size=(n_row, n_chan), chunks=(row_chunks, n_chan)) imag = 1.0j * (da.random.normal(loc=0.0, scale=options.std, size=(n_row, n_chan), chunks=(row_chunks, n_chan))) noise.append(real + imag) # Zero matrix for off-diagonals zero = da.zeros((n_row, n_chan), chunks=(row_chunks, n_chan)) noise.insert(1, zero) noise.insert(2, zero) # NP to Dask noise = da.stack(noise, axis=2).rechunk((row_chunks, n_chan, n_corr)) # Assign noise to 'NOISE' MS = MS.assign( **{'NOISE': (("row", "chan", "corr"), noise.astype(np.complex64))}) out_names += ['NOISE'] # Add noise to data and assign to 'DATA' noisy_data = data + noise MS = MS.assign( **{ options.dname: (("row", "chan", "corr"), noisy_data.astype(np.complex64)) }) out_names += [options.dname] # Create a write to the table write = xds_to_table(MS, ms, out_names) # Submit all graph computations in parallel print(f"==> Executing `dask-ms` write to `{ms}` for the following columns: "\ + f"{', '.join(out_names)}") with ProgressBar(): write.compute() print(f"==> Completed.")
def impl(sigma_n, sigma_f, n_time, n_chan, n_ant, n_dir, corr_shape, jones_shape, phase_only_gains=False): rs = np.random.RandomState(42) n_bl = n_ant * (n_ant - 1) // 2 n_row = n_bl * n_time # make aux data antenna1 = np.zeros(n_row, dtype=np.int16) antenna2 = np.zeros(n_row, dtype=np.int16) time = np.zeros(n_row, dtype=np.float64) uvw = np.zeros((n_row, 3), dtype=np.float64) time_values = np.linspace(0, 1, n_time) freq = np.linspace(1e9, 2e9, n_chan) for i in range(n_time): row = 0 for p in range(n_ant): for q in range(p): time[i * n_bl + row] = time_values[i] antenna1[i * n_bl + row] = p antenna2[i * n_bl + row] = q uvw[i * n_bl + row] = np.random.randn(3) row += 1 assert time.size == n_row # simulate visibilities model_data = np.zeros((n_row, n_chan, n_dir) + corr_shape, dtype=np.complex128) # make up some sources lm = lm_factory(n_dir, rs) alpha = -0.7 freq0 = freq[n_chan // 2] flux = flux_factory(n_dir, n_chan, corr_shape, alpha, freq, freq0, rs) # simulate model data for dir in range(n_dir): dir_lm = lm[dir].reshape(1, 2) # Get flux for source (keep source axis, flatten cor axis) dir_flux = flux[dir].reshape(1, n_chan, np.prod(corr_shape)) tmp = im_to_vis(dir_flux, uvw, dir_lm, freq) model_data[:, :, dir] = tmp.reshape((n_row, n_chan) + corr_shape) assert not np.isnan(model_data).any() # simulate gains (just randomly scattered around 1 for now) jones = np.ones((n_time, n_ant, n_chan, n_dir) + jones_shape, dtype=np.complex128) if sigma_f: if phase_only_gains: jones = np.exp( 1.0j * rs.normal(loc=0.0, scale=sigma_f, size=jones.shape)) else: jones += ( rs.normal(loc=0.0, scale=sigma_f, size=jones.shape) + 1.0j * rs.normal(loc=0.0, scale=sigma_f, size=jones.shape)) assert (np.abs(jones) > 1e-5).all() assert not np.isnan(jones).any() # get vis _, time_bin_indices, time_bin_counts = chunkify_rows(time, n_time) vis = corrupt_vis(time_bin_indices, time_bin_counts, antenna1, antenna2, jones, model_data) assert not np.isnan(vis).any() # add noise if sigma_n: vis += (rs.normal(loc=0.0, scale=sigma_n, size=vis.shape) + 1.0j * rs.normal(loc=0.0, scale=sigma_n, size=vis.shape)) weights = np.ones(vis.shape, dtype=np.float64) if sigma_n: weights /= sigma_n**2 flag = np.zeros(vis.shape, dtype=np.bool) data_dict = {} data_dict["DATA"] = vis data_dict["MODEL_DATA"] = model_data data_dict["WEIGHT_SPECTRUM"] = weights data_dict["TIME"] = time data_dict["ANTENNA1"] = antenna1 data_dict["ANTENNA2"] = antenna2 data_dict["FLAG"] = flag data_dict['JONES'] = jones return data_dict
def simulate(args): # get full time column and compute row chunks ms = table(args.ms) time = ms.getcol('TIME') row_chunks, tbin_idx, tbin_counts = chunkify_rows(time, args.utimes_per_chunk) # convert to dask arrays tbin_idx = da.from_array(tbin_idx, chunks=(args.utimes_per_chunk)) tbin_counts = da.from_array(tbin_counts, chunks=(args.utimes_per_chunk)) n_time = tbin_idx.size ant1 = ms.getcol('ANTENNA1') ant2 = ms.getcol('ANTENNA2') n_ant = np.maximum(ant1.max(), ant2.max()) + 1 flag = ms.getcol("FLAG") n_row, n_freq, n_corr = flag.shape if n_corr == 4: model_corr = (2, 2) jones_corr = (2, ) elif n_corr == 2: model_corr = (2, ) jones_corr = (2, ) elif n_corr == 1: model_corr = (1, ) jones_corr = (1, ) else: raise RuntimeError("Invalid number of correlations") ms.close() # get phase dir radec0 = table(args.ms + '::FIELD').getcol('PHASE_DIR').squeeze() # get freqs freq = table(args.ms + '::SPECTRAL_WINDOW').getcol('CHAN_FREQ')[0].astype( np.float64) assert freq.size == n_freq # get source coordinates from lsm lsm = Tigger.load(args.sky_model) radec = [] stokes = [] spi = [] ref_freqs = [] for source in lsm.sources: radec.append([source.pos.ra, source.pos.dec]) stokes.append([source.flux.I]) tmp_spec = source.spectrum spi.append([tmp_spec.spi if tmp_spec is not None else 0.0]) ref_freqs.append([tmp_spec.freq0 if tmp_spec is not None else 1.0]) n_dir = len(stokes) radec = np.asarray(radec) lm = radec_to_lm(radec, radec0) # load in the model file model = np.zeros((n_freq, n_dir) + model_corr) stokes = np.asarray(stokes) ref_freqs = np.asarray(ref_freqs) spi = np.asarray(spi) for d in range(n_dir): Stokes_I = stokes[d] * (freq / ref_freqs[d])**spi[d] if n_corr == 4: model[:, d, 0, 0] = Stokes_I model[:, d, 1, 1] = Stokes_I elif n_corr == 2: model[:, d, 0] = Stokes_I model[:, d, 1] = Stokes_I else: model[:, d, 0] = Stokes_I # append antenna columns cols = [] cols.append('ANTENNA1') cols.append('ANTENNA2') cols.append('UVW') # load in gains jones, alphas = make_screen(lm, freq, n_time, n_ant, jones_corr[0]) jones = jones.astype(np.complex128) jones_shape = jones.shape jones_da = da.from_array(jones, chunks=(args.utimes_per_chunk, ) + jones_shape[1::]) freqs = da.from_array(freq, chunks=(n_freq)) lm = da.from_array(np.tile(lm[None], (n_time, 1, 1)), chunks=(args.utimes_per_chunk, n_dir, 2)) # change model to dask array tmp_shape = (n_time, ) for i in range(len(model.shape)): tmp_shape += (1, ) model = da.from_array(np.tile(model[None], tmp_shape), chunks=(args.utimes_per_chunk, ) + model.shape) # load data in in chunks and apply gains to each chunk xds = xds_from_ms(args.ms, columns=cols, chunks={"row": row_chunks})[0] ant1 = xds.ANTENNA1.data ant2 = xds.ANTENNA2.data uvw = xds.UVW.data # apply gains data = compute_and_corrupt_vis(tbin_idx, tbin_counts, ant1, ant2, jones_da, model, uvw, freqs, lm) # Assign visibilities to args.out_col and write to ms xds = xds.assign( **{ args.out_col: (("row", "chan", "corr"), data.reshape(n_row, n_freq, n_corr)) }) # Create a write to the table write = xds_to_table(xds, args.ms, [args.out_col]) # Submit all graph computations in parallel with ProgressBar(): write.compute() return jones, alphas
def calibrate(args, jones, alphas): # simple calibration to test if simulation went as expected. # Note do not run on large data set # load data ms = table(args.ms) time = ms.getcol('TIME') _, tbin_idx, tbin_counts = chunkify_rows(time, args.utimes_per_chunk) n_time = tbin_idx.size ant1 = ms.getcol('ANTENNA1') ant2 = ms.getcol('ANTENNA2') n_ant = np.maximum(ant1.max(), ant2.max()) + 1 uvw = ms.getcol('UVW').astype(np.float64) data = ms.getcol(args.out_col) # this is where we put the data # we know it is pure Stokes I so we can solve using diagonals only data = data[:, :, (0, 3)].astype(np.complex128) n_row, n_freq, n_corr = data.shape flag = ms.getcol('FLAG') flag = flag[:, :, (0, 3)] # get phase dir radec0 = table(args.ms + '::FIELD').getcol('PHASE_DIR').squeeze().astype( np.float64) # get freqs freq = table(args.ms + '::SPECTRAL_WINDOW').getcol('CHAN_FREQ')[0].astype( np.float64) assert freq.size == n_freq # now get the model # get source coordinates from lsm lsm = Tigger.load(args.sky_model) radec = [] stokes = [] spi = [] ref_freqs = [] for source in lsm.sources: radec.append([source.pos.ra, source.pos.dec]) stokes.append([source.flux.I]) tmp_spec = source.spectrum spi.append([tmp_spec.spi if tmp_spec is not None else 0.0]) ref_freqs.append([tmp_spec.freq0 if tmp_spec is not None else 1.0]) n_dir = len(stokes) radec = np.asarray(radec) lm = radec_to_lm(radec, radec0) # get model visibilities model = np.zeros((n_row, n_freq, n_dir, 2), dtype=np.complex) stokes = np.asarray(stokes) ref_freqs = np.asarray(ref_freqs) spi = np.asarray(spi) for d in range(n_dir): Stokes_I = stokes[d] * (freq / ref_freqs[d])**spi[d] model[:, :, d, 0:1] = im_to_vis(Stokes_I[None, :, None], uvw, lm[d:d + 1], freq) model[:, :, d, 1] = model[:, :, d, 0] # set weights to unity weight = np.ones_like(data, dtype=np.float64) # initialise gains jones0 = np.ones((n_time, n_ant, n_freq, n_dir, n_corr), dtype=np.complex128) # calibrate ti = timeit() jones_hat, jhj, jhr, k = gauss_newton(tbin_idx, tbin_counts, ant1, ant2, jones0, data, flag, model, weight, tol=1e-5, maxiter=100) print("%i iterations took %fs" % (k, timeit() - ti)) # verify result for p in range(2): for q in range(p): diff_true = np.angle(jones[:, p] * jones[:, q].conj()) diff_hat = np.angle(jones_hat[:, p] * jones_hat[:, q].conj()) try: assert_array_almost_equal(diff_true, diff_hat, decimal=2) except Exception as e: print(e)
args = create_parser().parse_args() if args.ncpu: ncpu = args.ncpu from multiprocessing.pool import ThreadPool import dask dask.config.set(pool=ThreadPool(ncpu)) else: import multiprocessing ncpu = multiprocessing.cpu_count() print("Using %i threads" % ncpu) # get full time column and compute row chunks time = table(args.ms).getcol('TIME') row_chunks, tbin_idx, tbin_counts = chunkify_rows(time, args.utimes_per_chunk) # convert to dask arrays tbin_idx = da.from_array(tbin_idx, chunks=(args.utimes_per_chunk)) tbin_counts = da.from_array(tbin_counts, chunks=(args.utimes_per_chunk)) # get model column names model_cols = args.model_cols.split(',') n_dir = len(model_cols) # append antenna columns cols = [] cols.append('ANTENNA1') cols.append('ANTENNA2') cols.append(args.data_col) for col in model_cols: cols.append(col)
def test_forwardmodel(do_beam, do_gains, tmp_path_factory): test_dir = tmp_path_factory.mktemp("test_pfb") packratt.get('/test/ms/2021-06-24/elwood/test_ascii_1h60.0s.MS.tar', str(test_dir)) import numpy as np np.random.seed(420) from numpy.testing import assert_allclose from pyrap.tables import table ms = table(str(test_dir / 'test_ascii_1h60.0s.MS'), readonly=False) spw = table(str(test_dir / 'test_ascii_1h60.0s.MS::SPECTRAL_WINDOW')) utime = np.unique(ms.getcol('TIME')) freq = spw.getcol('CHAN_FREQ').squeeze() freq0 = np.mean(freq) ntime = utime.size nchan = freq.size nant = np.maximum( ms.getcol('ANTENNA1').max(), ms.getcol('ANTENNA2').max()) + 1 ncorr = ms.getcol('FLAG').shape[-1] uvw = ms.getcol('UVW') nrow = uvw.shape[0] u_max = abs(uvw[:, 0]).max() v_max = abs(uvw[:, 1]).max() uv_max = np.maximum(u_max, v_max) # image size from africanus.constants import c as lightspeed cell_N = 1.0 / (2 * uv_max * freq.max() / lightspeed) srf = 2.0 cell_rad = cell_N / srf cell_size = cell_rad * 180 / np.pi print("Cell size set to %5.5e arcseconds" % cell_size) fov = 2 npix = int(fov / cell_size) if npix % 2: npix += 1 nx = npix ny = npix print("Image size set to (%i, %i, %i)" % (nchan, nx, ny)) # model model = np.zeros((nchan, nx, ny), dtype=np.float64) nsource = 10 Ix = np.random.randint(0, npix, nsource) Iy = np.random.randint(0, npix, nsource) alpha = -0.7 + 0.1 * np.random.randn(nsource) I0 = 1.0 + np.abs(np.random.randn(nsource)) for i in range(nsource): model[:, Ix[i], Iy[i]] = I0[i] * (freq / freq0)**alpha[i] if do_beam: # primary beam from katbeam import JimBeam beam = JimBeam('MKAT-AA-L-JIM-2020') l_coord = -np.arange(-(nx // 2), nx // 2) * cell_size m_coord = np.arange(-(ny // 2), ny // 2) * cell_size xx, yy = np.meshgrid(l_coord, m_coord, indexing='ij') pbeam = np.zeros((nchan, nx, ny), dtype=np.float64) for i in range(nchan): pbeam[i] = beam.I(xx, yy, freq[i] / 1e6) # freq in MHz model_att = pbeam * model bm = 'JimBeam' else: model_att = model bm = None # model vis from ducc0.wgridder import dirty2ms model_vis = np.zeros((nrow, nchan, ncorr), dtype=np.complex128) for c in range(nchan): model_vis[:, c:c + 1, 0] = dirty2ms(uvw, freq[c:c + 1], model_att[c], pixsize_x=cell_rad, pixsize_y=cell_rad, epsilon=1e-8, do_wstacking=True, nthreads=8) model_vis[:, c, -1] = model_vis[:, c, 0] ms.putcol('MODEL_DATA', model_vis.astype(np.complex64)) if do_gains: t = (utime - utime.min()) / (utime.max() - utime.min()) nu = 2.5 * (freq / freq0 - 1.0) from africanus.gps.utils import abs_diff tt = abs_diff(t, t) lt = 0.25 Kt = 0.1 * np.exp(-tt**2 / (2 * lt**2)) Lt = np.linalg.cholesky(Kt + 1e-10 * np.eye(ntime)) vv = abs_diff(nu, nu) lv = 0.1 Kv = 0.1 * np.exp(-vv**2 / (2 * lv**2)) Lv = np.linalg.cholesky(Kv + 1e-10 * np.eye(nchan)) L = (Lt, Lv) from pfb.utils.misc import kron_matvec jones = np.zeros((ntime, nant, nchan, 1, ncorr), dtype=np.complex128) for p in range(nant): for c in [0, -1]: # for now only diagonal xi_amp = np.random.randn(ntime, nchan) amp = np.exp(-nu[None, :]**2 + kron_matvec(L, xi_amp).reshape(ntime, nchan)) xi_phase = np.random.randn(ntime, nchan) phase = kron_matvec(L, xi_phase).reshape(ntime, nchan) jones[:, p, :, 0, c] = amp * np.exp(1.0j * phase) # corrupted vis model_vis = model_vis.reshape(nrow, nchan, 1, 2, 2) from africanus.calibration.utils import chunkify_rows time = ms.getcol('TIME') row_chunks, tbin_idx, tbin_counts = chunkify_rows(time, ntime) ant1 = ms.getcol('ANTENNA1') ant2 = ms.getcol('ANTENNA2') from africanus.calibration.utils import corrupt_vis vis = corrupt_vis(tbin_idx, tbin_counts, ant1, ant2, jones, model_vis).reshape(nrow, nchan, ncorr) model_vis[:, :, 0, 0, 0] = 1.0 + 0j model_vis[:, :, 0, -1, -1] = 1.0 + 0j muellercol = corrupt_vis(tbin_idx, tbin_counts, ant1, ant2, jones, model_vis).reshape(nrow, nchan, ncorr) ms.putcol('DATA', vis.astype(np.complex64)) ms.putcol('CORRECTED_DATA', muellercol.astype(np.complex64)) ms.close() mcol = 'CORRECTED_DATA' else: ms.putcol('DATA', model_vis.astype(np.complex64)) mcol = None from pfb.workers.grid.dirty import _dirty _dirty(ms=str(test_dir / 'test_ascii_1h60.0s.MS'), data_column="DATA", weight_column='WEIGHT', imaging_weight_column=None, flag_column='FLAG', mueller_column=mcol, row_chunks=None, epsilon=1e-5, wstack=True, mock=False, double_accum=True, output_filename=str(test_dir / 'test'), nband=nchan, field_of_view=fov, super_resolution_factor=srf, cell_size=None, nx=None, ny=None, output_type='f4', nworkers=1, nthreads_per_worker=1, nvthreads=8, mem_limit=8, nthreads=8, host_address=None) from pfb.workers.grid.psf import _psf _psf(ms=str(test_dir / 'test_ascii_1h60.0s.MS'), data_column="DATA", weight_column='WEIGHT', imaging_weight_column=None, flag_column='FLAG', mueller_column=mcol, row_out_chunk=-1, row_chunks=None, epsilon=1e-5, wstack=True, mock=False, psf_oversize=2, double_accum=True, output_filename=str(test_dir / 'test'), nband=nchan, field_of_view=fov, super_resolution_factor=srf, cell_size=None, nx=None, ny=None, output_type='f4', nworkers=1, nthreads_per_worker=1, nvthreads=8, mem_limit=8, nthreads=8, host_address=None) # solve for model using pcg and mask mask = np.any(model, axis=0) from astropy.io import fits from pfb.utils.fits import save_fits hdr = fits.getheader(str(test_dir / 'test_dirty.fits')) save_fits(str(test_dir / 'test_model.fits'), model, hdr) save_fits(str(test_dir / 'test_mask.fits'), mask, hdr) from pfb.workers.deconv.forward import _forward _forward(residual=str(test_dir / 'test_dirty.fits'), psf=str(test_dir / 'test_psf.fits'), mask=str(test_dir / 'test_mask.fits'), beam_model=bm, band='L', weight_table=str(test_dir / 'test.zarr'), output_filename=str(test_dir / 'test'), nband=nchan, output_type='f4', epsilon=1e-5, sigmainv=0.0, wstack=True, double_accum=True, cg_tol=1e-6, cg_minit=10, cg_maxit=100, cg_verbose=0, cg_report_freq=10, backtrack=False, nworkers=1, nthreads_per_worker=1, nvthreads=1, mem_limit=8, nthreads=1, host_address=None) # get inferred model from pfb.utils.fits import load_fits model_inferred = load_fits(str(test_dir / 'test_update.fits')).squeeze() for i in range(nsource): if do_beam: beam = pbeam[:, Ix[i], Iy[i]] assert_allclose( 0.0, beam * (model_inferred[:, Ix[i], Iy[i]] - model[:, Ix[i], Iy[i]]), atol=1e-4) else: assert_allclose(0.0, model_inferred[:, Ix[i], Iy[i]] - model[:, Ix[i], Iy[i]], atol=1e-4)
def both(args): """Generate model data, corrupted visibilities and gains (phase-only or normal)""" # Set thread count to cpu count if args.ncpu: from multiprocessing.pool import ThreadPool import dask dask.config.set(pool=ThreadPool(args.ncpu)) else: import multiprocessing args.ncpu = multiprocessing.cpu_count() # Get full time column and compute row chunks ms = xds_from_table(args.ms)[0] row_chunks, tbin_idx, tbin_counts = chunkify_rows( ms.TIME, args.utimes_per_chunk) # Convert time rows to dask arrays tbin_idx = da.from_array(tbin_idx, chunks=(args.utimes_per_chunk)) tbin_counts = da.from_array(tbin_counts, chunks=(args.utimes_per_chunk)) # Time axis n_time = tbin_idx.size # Get antenna columns ant1 = ms.ANTENNA1.data ant2 = ms.ANTENNA2.data # No. of antennas axis n_ant = (np.maximum(ant1.max(), ant2.max()) + 1).compute() # Get flag column flag = ms.FLAG.data # Get convention if args.phase_convention == 'CASA': uvw = -(ms.UVW.data.astype(np.float64)) elif args.phase_convention == 'CODEX': uvw = ms.UVW.data.astype(np.float64) else: raise ValueError("Unknown sign convention for phase") # Get rest of dimensions n_row, n_freq, n_corr = flag.shape # Raise error if correlation axis too small if n_corr != 4: raise NotImplementedError("Only 4 correlations "\ + "currently supported") # Get phase direction radec0_table = xds_from_table(args.ms+'::FIELD')[0] radec0 = radec0_table.PHASE_DIR.data.squeeze().compute() # Get frequency column freq_table = xds_from_table(args.ms+'::SPECTRAL_WINDOW')[0] freq = freq_table.CHAN_FREQ.data.astype(np.float64)[0] # Check dimension assert freq.size == n_freq # Check for sky-model if args.sky_model == 'MODEL-1.txt': args.sky_model = MODEL_1 elif args.sky_model == 'MODEL-4.txt': args.sky_model = MODEL_4 elif args.sky_model == 'MODEL-50.txt': args.sky_model = MODEL_50 else: raise NotImplemented(f"Sky-model {args.sky_model} not in "\ + "kalcal/datasets/sky_model/") # Build source model from lsm lsm = Tigger.load(args.sky_model) # Direction axis n_dir = len(lsm.sources) # Create initial model array model = np.zeros((n_dir, n_freq, n_corr), dtype=np.float64) # Create initial coordinate array and source names lm = np.zeros((n_dir, 2), dtype=np.float64) source_names = [] # Cycle coordinates creating a source with flux for d, source in enumerate(lsm.sources): # Extract name source_names.append(source.name) # Extract position radec_s = np.array([[source.pos.ra, source.pos.dec]]) lm[d] = radec_to_lm(radec_s, radec0) # Get flux - Stokes I if source.flux.I: I0 = source.flux.I # Get spectrum (only spi currently supported) tmp_spec = source.spectrum spi = [tmp_spec.spi if tmp_spec is not None else 0.0] ref_freq = [tmp_spec.freq0 if tmp_spec is not None else 1.0] # Generate model flux model[d, :, 0] = I0 * (freq/ref_freq)**spi # Get flux - Stokes Q if source.flux.Q: Q0 = source.flux.Q # Get spectrum tmp_spec = source.spectrum spi = [tmp_spec.spi if tmp_spec is not None else 0.0] ref_freq = [tmp_spec.freq0 if tmp_spec is not None else 1.0] # Generate model flux model[d, :, 1] = Q0 * (freq/ref_freq)**spi # Get flux - Stokes U if source.flux.U: U0 = source.flux.U # Get spectrum tmp_spec = source.spectrum spi = [tmp_spec.spi if tmp_spec is not None else 0.0] ref_freq = [tmp_spec.freq0 if tmp_spec is not None else 1.0] # Generate model flux model[d, :, 2] = U0 * (freq/ref_freq)**spi # Get flux - Stokes V if source.flux.V: V0 = source.flux.V # Get spectrum tmp_spec = source.spectrum spi = [tmp_spec.spi if tmp_spec is not None else 0.0] ref_freq = [tmp_spec.freq0 if tmp_spec is not None else 1.0] # Generate model flux model[d, :, 3] = V0 * (freq/ref_freq)**spi # Generate gains jones = None jones_shape = None # Dask to NP t = tbin_idx.compute() nu = freq.compute() print('==> Both-mode') if args.mode == "phase": jones = phase_gains(lm, nu, n_time, n_ant, args.alpha_std) elif args.mode == "normal": jones = normal_gains(t, nu, lm, n_ant, n_corr, args.sigma_f, args.lt, args.lnu, args.ls) else: raise ValueError("Only normal and phase modes available.") print() # Reduce jones to diagonals only jones = jones[:, :, :, :, (0, -1)] # Jones to complex jones = jones.astype(np.complex128) # Jones shape jones_shape = jones.shape # Generate filename if args.out == "": args.out = f"{args.mode}.npy" # Save gains and settings to file with open(args.out, 'wb') as file: np.save(file, jones) # Build dask graph lm = da.from_array(lm, chunks=lm.shape) model = da.from_array(model, chunks=model.shape) jones_da = da.from_array(jones, chunks=(args.utimes_per_chunk,) + jones_shape[1::]) # Append antenna columns cols = [] cols.append('ANTENNA1') cols.append('ANTENNA2') cols.append('UVW') # Load data in in chunks and apply gains to each chunk xds = xds_from_ms(args.ms, columns=cols, chunks={"row": row_chunks})[0] ant1 = xds.ANTENNA1.data ant2 = xds.ANTENNA2.data # Adjust UVW based on phase-convention if args.phase_convention == 'CASA': uvw = -xds.UVW.data.astype(np.float64) elif args.phase_convention == 'CODEX': uvw = xds.UVW.data.astype(np.float64) else: raise ValueError("Unknown sign convention for phase") # Get model visibilities model_vis = np.zeros((n_row, n_freq, n_dir, n_corr), dtype=np.complex128) for s in range(n_dir): model_vis[:, :, s] = im_to_vis( model[s].reshape((1, n_freq, n_corr)), uvw, lm[s].reshape((1, 2)), freq, dtype=np.complex64, convention='fourier') # NP to Dask model_vis = da.from_array(model_vis, chunks=(row_chunks, n_freq, n_dir, n_corr)) # Convert Stokes to corr in_schema = ['I', 'Q', 'U', 'V'] out_schema = [['RR', 'RL'], ['LR', 'LL']] model_vis = convert(model_vis, in_schema, out_schema) # Apply gains data = corrupt_vis(tbin_idx, tbin_counts, ant1, ant2, jones_da, model_vis).reshape( (n_row, n_freq, n_corr)) # Assign model visibilities out_names = [] for d in range(n_dir): xds = xds.assign(**{source_names[d]: (("row", "chan", "corr"), model_vis[:, :, d].reshape( n_row, n_freq, n_corr).astype(np.complex64))}) out_names += [source_names[d]] # Assign noise free visibilities to 'CLEAN_DATA' xds = xds.assign(**{'CLEAN_DATA': (("row", "chan", "corr"), data.astype(np.complex64))}) out_names += ['CLEAN_DATA'] # Get noise realisation if args.sigma_n > 0.0: # Noise matrix noise = (da.random.normal(loc=0.0, scale=args.sigma_n, size=(n_row, n_freq, n_corr), chunks=(row_chunks, n_freq, n_corr)) \ + 1.0j*da.random.normal(loc=0.0, scale=args.sigma_n, size=(n_row, n_freq, n_corr), chunks=(row_chunks, n_freq, n_corr)))/np.sqrt(2.0) # Zero matrix for off-diagonals zero = da.zeros_like(noise[:, :, 0]) # Dask to NP noise = noise.compute() zero = zero.compute() # Remove noise on off-diagonals noise[:, :, 1] = zero[:, :] noise[:, :, 2] = zero[:, :] # NP to Dask noise = da.from_array(noise, chunks=(row_chunks, n_freq, n_corr)) # Assign noise to 'NOISE' xds = xds.assign(**{'NOISE': (("row", "chan", "corr"), noise.astype(np.complex64))}) out_names += ['NOISE'] # Add noise to data and assign to 'DATA' noisy_data = data + noise xds = xds.assign(**{'DATA': (("row", "chan", "corr"), noisy_data.astype(np.complex64))}) out_names += ['DATA'] # Create a write to the table write = xds_to_table(xds, args.ms, out_names) # Submit all graph computations in parallel with ProgressBar(): write.compute() print(f"==> Applied Jones to MS: {args.ms} <--> {args.out}")
def jones(args): """Generate jones matrix only, but based off of a measurement set.""" # Set thread count to cpu count if args.ncpu: from multiprocessing.pool import ThreadPool import dask dask.config.set(pool=ThreadPool(args.ncpu)) else: import multiprocessing args.ncpu = multiprocessing.cpu_count() # Get full time column and compute row chunks ms = xds_from_table(args.ms)[0] _, tbin_idx, tbin_counts = chunkify_rows( ms.TIME, args.utimes_per_chunk) # Convert time rows to dask arrays tbin_idx = da.from_array(tbin_idx, chunks=(args.utimes_per_chunk)) tbin_counts = da.from_array(tbin_counts, chunks=(args.utimes_per_chunk)) # Time axis n_time = tbin_idx.size # Get antenna columns ant1 = ms.ANTENNA1.data ant2 = ms.ANTENNA2.data # No. of antennas axis n_ant = (np.maximum(ant1.max(), ant2.max()) + 1).compute() # Get flag column flag = ms.FLAG.data # Get convention if args.phase_convention == 'CASA': uvw = -(ms.UVW.data.astype(np.float64)) elif args.phase_convention == 'CODEX': uvw = ms.UVW.data.astype(np.float64) else: raise ValueError("Unknown sign convention for phase") # Get rest of dimensions n_row, n_freq, n_corr = flag.shape # Raise error if correlation axis too small if n_corr != 4: raise NotImplementedError("Only 4 correlations "\ + "currently supported") # Get phase direction radec0_table = xds_from_table(args.ms+'::FIELD')[0] radec0 = radec0_table.PHASE_DIR.data.squeeze().compute() # Get frequency column freq_table = xds_from_table(args.ms+'::SPECTRAL_WINDOW')[0] freq = freq_table.CHAN_FREQ.data.astype(np.float64)[0] # Check dimension assert freq.size == n_freq # Check for sky-model if args.sky_model == 'MODEL-1.txt': args.sky_model = MODEL_1 elif args.sky_model == 'MODEL-4.txt': args.sky_model = MODEL_4 elif args.sky_model == 'MODEL-50.txt': args.sky_model = MODEL_50 else: raise ValueError(f"Sky-model {args.sky_model} not in "\ + "kalcal/datasets/sky_model/") # Build source model from lsm lsm = Tigger.load(args.sky_model) # Direction axis n_dir = len(lsm.sources) # Create initial coordinate array and source names lm = np.zeros((n_dir, 2), dtype=np.float64) # Cycle coordinates creating a source with flux for d, source in enumerate(lsm.sources): # Extract position radec_s = np.array([[source.pos.ra, source.pos.dec]]) lm[d] = radec_to_lm(radec_s, radec0) # Generate gains jones = None print('==> Jones-only mode') if args.mode == "phase": jones = phase_gains(lm, freq, n_time, n_ant, args.alpha_std) elif args.mode == "normal": jones = normal_gains(tbin_idx, freq, lm, n_ant, n_corr, args.sigma_f, args.lt, args.lnu, args.ls) else: raise ValueError("Only normal and phase modes available.") # Reduce jones to diagonals only jones = jones[:, :, :, :, (0, -1)] # Jones to complex jones = jones.astype(np.complex128) # Generate filename if args.out == "": args.out = f"{args.mode}.npy" # Save gains and settings to file with open(args.out, 'wb') as file: np.save(file, jones) print(f"==> Created Jones data: {args.out}")
def main(args): # get full time column and compute row chunks ms = table(args.ms) time = ms.getcol('TIME') row_chunks, tbin_idx, tbin_counts = chunkify_rows( time, args.utimes_per_chunk) # convert to dask arrays tbin_idx = da.from_array(tbin_idx, chunks=(args.utimes_per_chunk)) tbin_counts = da.from_array(tbin_counts, chunks=(args.utimes_per_chunk)) n_time = tbin_idx.size ms.close() # get phase dir fld = table(args.ms+'::FIELD') radec0 = fld.getcol('PHASE_DIR').squeeze().reshape(1, 2) radec0 = np.tile(radec0, (n_time, 1)) fld.close() # get freqs freqs = table( args.ms+'::SPECTRAL_WINDOW').getcol('CHAN_FREQ')[0].astype(np.float64) n_freq = freqs.size freqs = da.from_array(freqs, chunks=(n_freq)) # get source coordinates from lsm lsm = Tigger.load(args.sky_model) radec = [] stokes = [] spi = [] ref_freqs = [] for source in lsm.sources: radec.append([source.pos.ra, source.pos.dec]) stokes.append([source.flux.I]) spi.append(source.spectrum.spi) ref_freqs.append(source.spectrum.freq0) n_dir = len(stokes) radec = np.asarray(radec) lm = np.zeros((n_time,) + radec.shape) for t in range(n_time): lm[t] = radec_to_lm(radec, radec0[t]) lm = da.from_array(lm, chunks=(args.utimes_per_chunk, n_dir, 2)) # load in the model file n_corr = 1 model = np.zeros((n_time, n_freq, n_dir, n_corr)) stokes = np.asarray(stokes) ref_freqs = np.asarray(ref_freqs) spi = np.asarray(spi) for t in range(n_time): for d in range(n_dir): model[t, :, d, 0] = stokes[d] * (freqs/ref_freqs[d])**spi[d] # append antenna columns cols = [] cols.append('ANTENNA1') cols.append('ANTENNA2') cols.append('UVW') # load in gains jones = np.load(args.gain_file) jones = jones.astype(np.complex128) jones_shape = jones.shape jones = da.from_array(jones, chunks=(args.utimes_per_chunk,) + jones_shape[1::]) # change model to dask array model = da.from_array(model, chunks=(args.utimes_per_chunk,) + model.shape[1::]) # load data in in chunks and apply gains to each chunk xds = xds_from_ms(args.ms, columns=cols, chunks={"row": row_chunks})[0] ant1 = xds.ANTENNA1.data ant2 = xds.ANTENNA2.data uvw = xds.UVW.data # apply gains data = compute_and_corrupt_vis(tbin_idx, tbin_counts, ant1, ant2, jones, model, uvw, freqs, lm) # Assign visibilities to args.out_col and write to ms xds = xds.assign(**{args.out_col: (("row", "chan", "corr"), data)}) # Create a write to the table write = xds_to_table(xds, args.ms, [args.out_col]) # Submit all graph computations in parallel with ProgressBar(): write.compute()