def __init__(self, fname, k, basename='cv', seed=None): self.fname = fname self.uvdata = UVData(fname) self.k = k self.seed = seed self.basename = basename self.test_fname = "{}_test.FITS".format(basename) self.train_fname = "{}_train.FITS".format(basename) self.baseline_folds = None self.create_folds()
def learning_curve(uv_fits_path, fracs, K, initial_dfm_model_path=None, n_iter=100, mapsize_clean=(512, 0.1), path_to_script=None, n_splits=10, data_dir=None, ls_cv='-', ls_train='-', plot=False): uvdata = UVData(uv_fits_path) cv_means = dict() train_means = dict() for frac in fracs: cv_means[frac] = list() train_means[frac] = list() for i in range(n_splits): uv_frac_path = os.path.join(data_dir, 'frac_{}.fits'.format(frac)) uvdata.save_fraction(uv_frac_path, frac, random_state=np.random.randint(0, 1000)) kfold = KFoldCV(uv_frac_path, K, seed=np.random.randint(0, 1000)) kfold.create_train_test_data(outdir=data_dir) cv_scores, train_scores = kfold.cv_score(initial_dfm_model_path=initial_dfm_model_path, data_dir=data_dir, niter=n_iter, mapsize_clean=mapsize_clean, path_to_script=path_to_script) cv_means[frac].append(np.mean(cv_scores)) train_means[frac].append(np.mean(train_scores)) # CV-score for full data cv_means[1.0] = list() train_means[1.0] = list() for i in range(n_splits): kfold = KFoldCV(uv_fits_path, K, seed=np.random.randint(0, 1000)) kfold.create_train_test_data(outdir=data_dir) cv_scores, train_scores = kfold.cv_score(initial_dfm_model_path=initial_dfm_model_path, data_dir=data_dir, niter=n_iter, mapsize_clean=mapsize_clean, path_to_script=path_to_script) cv_means[1.0].append(np.mean(cv_scores)) train_means[1.0].append(np.mean(train_scores)) if plot: fig, axes = plt.subplots() axes.errorbar(sorted(cv_means.keys()), y=[np.mean(cv_means[frac]) for frac in sorted(cv_means.keys())], yerr=[np.std(cv_means[frac]) for frac in sorted(cv_means.keys())], label='CV', ls=ls_cv) axes.errorbar(sorted(train_means.keys()), y=[np.mean(train_means[frac]) for frac in sorted(train_means.keys())], yerr=[np.std(train_means[frac]) for frac in sorted(train_means.keys())], label='Train', ls=ls_train) axes.legend() axes.set_xlabel("Frac. of training data") axes.set_ylabel("RMSE") fig.show() return cv_means, train_means
def run(self, modelcard=None, testcard=None, stokes='I'): """ Method that cross-validates set of image-plane models obtained by modelling training samples on corresponding set of testing samples. :param modelfiles: Wildcard of file names ~ 'model_0i_0jofN.txt', where model in 'model_0i_0jofN.txt' file is from modelling ``0j``-th training sample ('train_0jofN.FITS') with ``0i``-th model. :param testfiles: Wildcard of file names ~ 'test_0jofN.FITS'. :return: List of lists [modelfilename, CV-score, sigma_cv_score]. """ modelfiles = glob.glob(modelcard) testfiles = glob.glob(testcard) modelfiles.sort() testfiles.sort() ntest = len(testfiles) nmodels = len(modelfiles) / ntest assert (not len(modelfiles) % float(len(testfiles))) print("modelfiles : " + str(modelfiles)) print("testfiles : " + str(testfiles)) result = list() for i in range(nmodels): print("Using models " + str(modelfiles[ntest * i:ntest * (i + 1)]) + " and testing sample " + str(testfiles)) models = modelfiles[ntest * i:ntest * (i + 1)] cv_scores = list() for j, testfile in enumerate(testfiles): model = Model() model.add_from_txt(models[j], stoke=stokes) print("Using test file " + str(testfile)) data = UVData(testfile) cv_score = data.cv_score(model, stokes=stokes) print("cv_score for one testing sample is " + str(cv_score)) cv_scores.append(cv_score) mean_cv_score = np.mean(cv_scores) std_cv_score = np.std(cv_scores) print(mean_cv_score, std_cv_score) result.append(["model#" + str(i + 1), mean_cv_score, std_cv_score]) return result
class KFoldCV(object): def __init__(self, fname, k, basename='cv', seed=None): self.fname = fname self.uvdata = UVData(fname) self.k = k self.seed = seed self.basename = basename self.test_fname = "{}_test.FITS".format(basename) self.train_fname = "{}_train.FITS".format(basename) self.baseline_folds = None self.create_folds() def create_folds(self): baseline_folds = dict() for bl, indxs in self.uvdata._indxs_baselines.items(): print "Baseline {} has {} samples".format(bl, np.count_nonzero(indxs)) try: kfold = KFold(np.count_nonzero(indxs), self.k, shuffle=True, random_state=self.seed) baseline_folds[bl] = list() for train, test in kfold: tr = to_boolean_array( np.nonzero(indxs)[0][train], len(indxs)) te = to_boolean_array( np.nonzero(indxs)[0][test], len(indxs)) baseline_folds[bl].append((tr, te)) # When ``k`` more then number of baseline samples except ValueError: pass self.baseline_folds = baseline_folds def __iter__(self): for i in xrange(self.k): train_indxs = np.zeros(len(self.uvdata.hdu.data)) test_indxs = np.zeros(len(self.uvdata.hdu.data)) for bl, kfolds in self.baseline_folds.items(): itrain, itest = kfolds[i] # itrain = to_boolean_array(itrain) train_indxs = np.logical_or(train_indxs, itrain) test_indxs = np.logical_or(test_indxs, itest) train_data = self.uvdata.hdu.data[train_indxs] test_data = self.uvdata.hdu.data[test_indxs] self.uvdata.save(self.test_fname, test_data, rewrite=True) self.uvdata.save(self.train_fname, train_data, rewrite=True) yield self.train_fname, self.test_fname
def create_bootstrap_sample(uvdata_dict, ccfits_dict, data_dir, n_boot=10): """ Create ``n_boot`` bootstrap replications of the original UV-data with given several Stokes CC-models for each band. :param uvdata_dict: Dictionary with keys - bands and values - files with uv-data. :param ccfits_dict: Dictionary with keys - bands, stokes and values - files with CC-fits files with models for given band and Stokes. Creates ``n_boot`` UV-data files for each band with names ``boot_band_i.uvf`` in ``data_dir``. """ print("Bootstrap uv-data with CLEAN-models...") for band, uv_fits in uvdata_dict.items(): uvdata = UVData(os.path.join(data_dir, uv_fits)) # print("Band = {}".format(band)) models = list() for stokes, cc_fits in ccfits_dict[band].items(): # print("Stokes = {}".format(stokes)) ccmodel = create_model_from_fits_file( os.path.join(data_dir, cc_fits)) models.append(ccmodel) boot = CleanBootstrap(models, uvdata) curdir = os.getcwd() os.chdir(data_dir) boot.run(n=n_boot, nonparametric=False, use_v=False, use_kde=True, outname=['boot_{}'.format(band), '.uvf']) os.chdir(curdir)
def __init__(self, original_uvfits, outdir): self.original_uvfits = original_uvfits self.uvdata = UVData(original_uvfits) self.outdir = outdir self.train_uvfits = "cv_train.uvf" self.test_uvfits = "cv_test.uvf" self.cur_bl = None self.cur_scan = None
def load_uvdata(self): self.uvdata_dict = dict() self.uvfits_dict = dict() for fits_file in self.original_fits_files: print("Loading UV-FITS file {}".format(os.path.split(fits_file)[-1])) uvdata = UVData(fits_file) self.uvdata_dict.update({uvdata.band_center: uvdata}) self.uvfits_dict.update({uvdata.band_center: fits_file})
def bootstrap_uvfits_with_difmap_model( uv_fits_path, dfm_model_path, nonparametric=False, use_kde=False, use_v=False, n_boot=100, stokes='I', boot_dir=None, recenter=True, pairs=False, niter=100, bootstrapped_uv_fits=None, additional_noise=None, boot_mdl_outname_base="bootstrapped_model"): dfm_model_dir, dfm_model_fname = os.path.split(dfm_model_path) comps = import_difmap_model(dfm_model_fname, dfm_model_dir) if boot_dir is None: boot_dir = os.getcwd() if bootstrapped_uv_fits is None: uvdata = UVData(uv_fits_path) model = Model(stokes=stokes) model.add_components(*comps) boot = CleanBootstrap([model], uvdata, additional_noise=additional_noise) os.chdir(boot_dir) boot.run(nonparametric=nonparametric, use_kde=use_kde, recenter=recenter, use_v=use_v, n=n_boot, pairs=pairs) bootstrapped_uv_fits = sorted( glob.glob(os.path.join(boot_dir, 'bootstrapped_data*.fits'))) for j, bootstrapped_fits in enumerate(bootstrapped_uv_fits): modelfit_difmap(bootstrapped_fits, dfm_model_fname, '{}_{}.mdl'.format(boot_mdl_outname_base, j), path=boot_dir, mdl_path=dfm_model_dir, out_path=boot_dir, niter=niter) booted_mdl_paths = glob.glob( os.path.join(boot_dir, '{}*'.format(boot_mdl_outname_base))) # Clean uv_fits for file_ in bootstrapped_uv_fits: os.unlink(file_) logs = glob.glob(os.path.join(boot_dir, "*.log*")) for file_ in logs: os.unlink(file_) comms = glob.glob(os.path.join(boot_dir, "*commands*")) for file_ in comms: os.unlink(file_) return booted_mdl_paths
def score(uv_fits_path, mdl_path, stokes='I'): """ Returns rms of model on given uv-data for stokes 'I'. :param uv_fits_path: Path to uv-fits file. :param mdl_path: Path to difmap model text file or FITS-file with CLEAN model. :param stokes: (optional) Stokes parameter string. ``I``, ``RR`` or ``LL`` currently supported. (default: ``I``) :return: Per-point rms between given data and model evaluated at given data points. """ if stokes not in ('I', 'RR', 'LL'): raise Exception("Only stokes (I, RR, LL) supported!") uvdata = UVData(uv_fits_path) uvdata_model = UVData(uv_fits_path) try: model = create_model_from_fits_file(mdl_path) except IOError: dfm_mdl_dir, dfm_mdl_fname = os.path.split(mdl_path) comps = import_difmap_model(dfm_mdl_fname, dfm_mdl_dir) model = Model(stokes=stokes) model.add_components(*comps) uvdata_model.substitute([model]) uvdata_diff = uvdata - uvdata_model if stokes == 'I': i_diff = 0.5 * (uvdata_diff.uvdata_weight_masked[..., 0] + uvdata_diff.uvdata_weight_masked[..., 1]) elif stokes == 'RR': i_diff = uvdata_diff.uvdata_weight_masked[..., 0] elif stokes == 'LL': i_diff = uvdata_diff.uvdata_weight_masked[..., 1] else: raise Exception("Only stokes (I, RR, LL) supported!") # 2 means that Re & Im are counted independently factor = 2 * np.count_nonzero(i_diff) # factor = np.count_nonzero(~uvdata_diff.uvdata_weight_masked.mask[:, :, :2]) # squared_diff = uvdata_diff.uvdata_weight_masked[:, :, :2] * \ # uvdata_diff.uvdata_weight_masked[:, :, :2].conj() squared_diff = i_diff * i_diff.conj() return np.sqrt(float(np.sum(squared_diff)) / factor)
def __init__(self, uv_fits_path, k, basename='cv', seed=None, baselines=None, stokes='I'): if stokes not in ('I', 'RR', 'LL'): raise Exception("Only stokes (I, RR, LL) supported!") self.stokes = stokes self.uv_fits_path = uv_fits_path self.uvdata = UVData(uv_fits_path) self.k = k self.seed = seed self.basename = basename self.test_fname_base = "{}_test".format(basename) self.train_fname_base = "{}_train".format(basename) self.baseline_folds = None self.create_folds(baselines)
def bootstrap_uv_fits(uv_fits_path, cc_fits_paths, n, outpath=None, outname=None): """ Function that bootstraps uv-data in user-specified FITS-files and FITS-files with clean components. :param uv_fits_path: Path to fits file with self-calibrated uv-data. :param cc_fits_paths: Iterable of paths to files with CC models. :param n: Number of bootstrap realizations. :param outpath: (optional) Directory to save bootstrapped uv-data FITS-files. If ``None`` then use CWD. (default: ``None``) :param outname: (optional) How to name bootstrapped uv-data FITS-files. If ``None`` then use default for ``Bootstap.run`` method. (default: ``None``) """ uvdata = UVData(uv_fits_path) models = list() for cc_fits_path in cc_fits_paths: ccmodel = create_model_from_fits_file(cc_fits_path) models.append(ccmodel) boot = CleanBootstrap(models, uvdata) if outpath is not None: if not os.path.exists(outpath): os.makedirs(outpath) curdir = os.getcwd() os.chdir(outpath) boot.run(n=n, outname=outname, nonparametric=False, use_v=False, use_kde=True) os.chdir(curdir)
use_V=use_V, use_weights=use_weights) self.lnpr = LnPrior(model) def __call__(self, p): lnpr = self.lnpr(p[:]) if not np.isfinite(lnpr): return -np.inf return self.lnlik(p[:]) + lnpr if __name__ == '__main__': from spydiff import import_difmap_model from uv_data import UVData from model import Model, Jitter uv_fits = '/home/ilya/code/vlbi_errors/pet/0235+164_X.uvf_difmap' uvdata = UVData(uv_fits) # Create model mdl = Model(stokes='RR') comps = import_difmap_model('0235+164_X.mdl', '/home/ilya/code/vlbi_errors/pet') comps[0].add_prior(flux=(sp.stats.uniform.logpdf, [0., 10], dict(),), bmaj=(sp.stats.uniform.logpdf, [0, 1], dict(),), e=(sp.stats.uniform.logpdf, [0, 1.], dict(),), bpa=(sp.stats.uniform.logpdf, [0, np.pi], dict(),)) comps[1].add_prior(flux=(sp.stats.uniform.logpdf, [0., 3], dict(),), bmaj=(sp.stats.uniform.logpdf, [0, 5], dict(),)) mdl.add_components(*comps) # Create log of likelihood function lnlik = LnLikelihood(uvdata, mdl) lnpr = LnPrior(mdl)
import os import sys sys.path.insert(0, '/home/ilya/github/vlbi_errors/vlbi_errors') import numpy as np from skimage.transform import rotate import matplotlib.pyplot as plt from uv_data import UVData from components import ImageComponent from model import Model mas_to_rad = 4.8481368 * 1E-09 # uv_file = '/home/ilya/github/bck/jetshow/uvf/0716+714_raks01xg_C_LL_0060s_uva.fits' uv_file = '/home/ilya/github/bck/jetshow/uvf/2200+420_K_SVLBI.uvf' uvdata = UVData(uv_file) # fig = uvdata.uvplot(stokes=["LL"]) fig = uvdata.uvplot() images = list() angles = range(0, 180, 30) # image = '/home/ilya/github/bck/jetshow/uvf/map_i_09_C.txt' image = '/home/ilya/github/bck/jetshow/cmake-build-debug/map_i.txt' image = np.loadtxt(image) images.append(image) # imsize = 1096 imsize = 1734 imsize = (imsize, imsize) # mas_in_pix = 0.005 mas_in_pix = 0.00253
return -np.inf return self.lnlik(p[:]) + lnpr if __name__ == '__main__': # Test LS_estimates import sys from components import CGComponent, EGComponent from uv_data import UVData from model import Model try: from scipy.optimize import minimize, fmin except ImportError: sys.exit("install scipy for ml estimation") uv_fname = '/home/ilya/vlbi_errors/examples/L/1633+382/1633+382.l18.2010_05_21.uvf' uvdata = UVData(uv_fname) # Create model cg1 = EGComponent(1.0, -0.8, 0.2, .7, 0.5, 0) cg2 = CGComponent(0.8, 2.0, -.3, 2.3) cg3 = CGComponent(0.2, 5.0, .0, 2.) mdl = Model(stokes='I') mdl.add_components(cg1, cg2, cg3) # Create log of likelihood function lnlik = LnLikelihood(uvdata, mdl, average_freq=True, amp_only=False) # Nelder-Mead simplex algorithm p_ml = fmin(lambda p: -lnlik(p), mdl.p) # Various methods of minimization (some require jacobians) # TODO: Implement analitical grad of likelihood (it's gaussian) fit = minimize(lambda p: -lnlik(p), mdl.p, method='L-BFGS-B',
def coverage_of_model(original_uv_fits, original_mdl_file, outdir=None, n_cov=100, n_boot=300, mapsize=(1024, 0.1), path_to_script=None): """ Conduct coverage analysis of uv-data & model :param original_uv_fits: Self-calibrated uv-fits file. :param original_mdl_file: Difmap txt-file with model. :param outdir: Output directory to store results. :param n_cov: Number of samples to create. """ # Create sample of 100 uv-fits data & models sample_uv_fits_paths, sample_model_paths = create_sample(original_uv_fits, original_mdl_file, outdir=outdir, n_sample=n_cov) # For each sample uv-fits & model find 1) conventional errors & 2) bootstrap # errors for j, (sample_uv_fits_path, sample_mdl_path) in enumerate( zip(sample_uv_fits_paths, sample_model_paths)): sample_uv_fits, dir = os.path.split(sample_uv_fits_path) sample_mdl_file, dir = os.path.split(sample_mdl_path) try: comps = import_difmap_model(sample_mdl_file, dir) except ValueError: print('Problem import difmap model') model = Model(stokes='I') model.add_components(*comps) # Find errors by using Fomalont way # 1. Clean uv-data clean_difmap(sample_uv_fits, 'sample_cc_{}.fits'.format(j), 'I', mapsize, path=dir, path_to_script=path_to_script, outpath=dir) # 2. Get beam ccimage = create_clean_image_from_fits_file( os.path.join(dir, 'sample_cc_{}.fits'.format(j))) beam = ccimage.beam_image # 2. Subtract components convolved with beam ccimage.substract_model(model) # Find errors by using Lee way # a) fit uv-data and find model # b) CLEAN uv-data # c) substract model from CLEAN image # d) find errors pass # Find errors by using bootstrap # FT model to uv-plane uvdata = UVData(sample_uv_fits_path) try: boot = CleanBootstrap([model], uvdata) # If uv-data contains only one Stokes parameter (e.g. `0838+133`) except IndexError: print('Problem bootstrapping') curdir = os.getcwd() os.chdir(dir) boot.run(n=n_boot, nonparametric=True, outname=[outname, '.fits']) os.chdir(curdir) booted_uv_paths = sorted( glob.glob(os.path.join(data_dir, outname + "*"))) # Modelfit bootstrapped uvdata for booted_uv_path in booted_uv_paths: path, booted_uv_file = os.path.split(booted_uv_path) i = booted_uv_file.split('_')[-1].split('.')[0] modelfit_difmap(booted_uv_file, dfm_model_fname, dfm_model_fname + '_' + i, path=path, mdl_path=data_dir, out_path=data_dir) # Get params of initial model used for bootstrap comps = import_difmap_model(dfm_model_fname, data_dir) comps_params0 = {i: [] for i in range(len(comps))} for i, comp in enumerate(comps): comps_params0[i].extend(list(comp.p)) # Load bootstrap models booted_mdl_paths = glob.glob( os.path.join(data_dir, dfm_model_fname + "_*")) comps_params = {i: [] for i in range(len(comps))} for booted_mdl_path in booted_mdl_paths: path, booted_mdl_file = os.path.split(booted_mdl_path) comps = import_difmap_model(booted_mdl_file, path) for i, comp in enumerate(comps): comps_params[i].extend(list(comp.p)) # Print 65-% intervals (1 sigma) for i, comp in enumerate(comps): errors_fname = '68_{}_{}_comp{}.txt'.format(source, last_epoch, i) fn = open(os.path.join(data_dir, errors_fname), 'w') print "Component #{}".format(i + 1) for j in range(len(comp)): low, high, mean, median = hdi_of_mcmc(np.array( comps_params[i]).reshape((n_boot, len(comp))).T[j], cred_mass=0.68, return_mean_median=True) fn.write("{} {} {} {} {}".format(comp.p[j], low, high, mean, median)) fn.write("\n") fn.close() # For source in sources with component close to core # 1. Find residuals or estimate noise # 2. N times add resampled residuals (or just gaussian noise) to model and # create N new datasets # 3. Fit them using difmap. # 4. Find errors using Fomalont, Yee and using bootstrap. Check coverage. base_dir = '/home/ilya/vlbi_errors/model_cov' n_boot = 300 outname = 'boot_uv' names = [ 'source', 'id', 'trash', 'epoch', 'flux', 'r', 'pa', 'bmaj', 'e', 'bpa' ] df = pd.read_table(os.path.join(base_dir, 'asu.tsv'), sep=';', header=None, names=names, dtype={key: str for key in names}, index_col=False) # Mow for all sources get the latest epoch and create directory for analysis for source in df['source'].unique(): epochs = df.loc[df['source'] == source]['epoch'] last_epoch_ = list(epochs)[-1] last_epoch = last_epoch_.replace('-', '_') data_dir = os.path.join(base_dir, source, last_epoch) if not os.path.exists(data_dir): os.makedirs(data_dir) try: download_mojave_uv_fits(source, epochs=[last_epoch], bands=['u'], download_dir=data_dir) except: open( 'problem_download_from_mojave_{}_{}'.format( source, last_epoch), 'a').close() continue uv_fits_fname = mojave_uv_fits_fname(source, 'u', last_epoch) # Create instance of Model and bootstrap uv-data dfm_model_fname = 'dfmp_original_model.mdl' fn = open(os.path.join(data_dir, dfm_model_fname), 'w') model_df = df.loc[np.logical_and(df['source'] == source, df['epoch'] == last_epoch_)] for (flux, r, pa, bmaj, e, bpa) in np.asarray( model_df[['flux', 'r', 'pa', 'bmaj', 'e', 'bpa']]): print flux, r, pa, bmaj, e, bpa if not r.strip(' '): r = '0.0' if not pa.strip(' '): pa = '0.0' if not bmaj.strip(' '): bmaj = '0.0' if not e.strip(' '): e = "1.0" if np.isnan(float(bpa)): bpa = "0.0" else: bpa = bpa + 'v' if bmaj == '0.0': type_ = 0 bpa = "0.0" else: bmaj = bmaj + 'v' type_ = 1 fn.write("{}v {}v {}v {} {} {} {} {} {}".format( flux, r, pa, bmaj, e, bpa, type_, "0", "0\n")) fn.close()
import os from uv_data import UVData from model import Model from spydiff import import_difmap_model from bootstrap import CleanBootstrap data_dir = '/home/ilya/code/vlbi_errors/tests/ft' uv_fits = '1308+326.U1.2009_08_28.UV_CAL' uvdata = UVData(os.path.join(data_dir, uv_fits)) model = Model(stokes='I') comps = import_difmap_model('1308+326.U1.2009_08_28.mdl', data_dir) model.add_components(*comps) boot = CleanBootstrap([model], uvdata) fig = boot.data.uvplot() boot.model_data.uvplot(fig=fig, color='r') # boot.find_outliers_in_residuals() # boot.find_residuals_centers(split_scans=False) # boot.fit_residuals_kde(split_scans=False, combine_scans=False, # recenter=True)
def score(uv_fits_path, mdl_path, stokes='I', bmaj=None, score="l2", use_weights=True): """ Returns rms of the trained model (CLEAN or difmap) on a given test UVFITS data set. :param uv_fits_path: Path to uv-fits file (test data). :param mdl_path: Path to difmap model text file or FITS-file with CLEAN model (trained model). :param stokes: (optional) Stokes parameter string. ``I``, ``RR`` or ``LL`` currently supported. (default: ``I``) :param bmaj: (optional) FWHM of the circular beam to account for. If ``None`` than do not account for the beam. (default: ``None``) :return: Per-point rms between given test data and trained model evaluated at a given test data points. """ stokes = stokes.upper() if stokes not in ('I', 'RR', 'LL'): raise Exception("Only stokes I, RR or LL are supported!") if bmaj is not None: c = (np.pi * bmaj * mas_to_rad)**2 / (4 * np.log(2)) else: c = 1.0 # Loading test data with its own big mask uvdata = UVData(uv_fits_path) uvdata_model = UVData(uv_fits_path) # Loading trained model # CC-model try: model = create_model_from_fits_file(mdl_path) # Difmap model except IOError: dfm_mdl_dir, dfm_mdl_fname = os.path.split(mdl_path) comps = import_difmap_model(dfm_mdl_fname, dfm_mdl_dir) model = Model(stokes=stokes) model.add_components(*comps) # Computing difference and score uvdata_model.substitute([model]) uvdata_diff = uvdata - uvdata_model if stokes == 'I': i_diff = 0.5 * (uvdata_diff.uvdata_weight_masked[..., 0] + uvdata_diff.uvdata_weight_masked[..., 1]) weights = uvdata.weights_nw_masked[..., 0] + uvdata.weights_nw_masked[..., 1] elif stokes == 'RR': i_diff = uvdata_diff.uvdata_weight_masked[..., 0] weights = uvdata.weights_nw_masked[..., 0] elif stokes == 'LL': i_diff = uvdata_diff.uvdata_weight_masked[..., 1] weights = uvdata.weights_nw_masked[..., 1] else: raise Exception("Only stokes (I, RR, LL) supported!") # Normalize weights weights = weights / np.ma.sum(weights) # Account for beam if bmaj is not None: u = uvdata_diff.uv[:, 0] v = uvdata_diff.uv[:, 1] taper = np.exp(-c * (u * u + v * v)) i_diff = i_diff * taper[:, np.newaxis] # Number of unmasked visibilities (accounting each IF) if stokes == "I": # 2 means that Re & Im are counted independently factor = 2 * np.count_nonzero(~i_diff.mask) else: factor = np.count_nonzero(~i_diff.mask) print("Number of independent test data points = ", factor) if score == "l2": if use_weights: result = np.sqrt( (np.ma.sum(i_diff * i_diff.conj() * weights)).real) else: result = np.sqrt((np.ma.sum(i_diff * i_diff.conj())).real / factor) elif score == "l1": if use_weights: result = (np.ma.sum(np.abs(i_diff) * weights)).real else: result = (np.ma.sum(np.abs(i_diff))).real / factor else: raise Exception("score must be in (l1, l2)!") return result
epoch = source_dict[source][0][5] source_dir = os.path.join(base_dir, source, epoch) if not os.path.exists(source_dir): os.makedirs(source_dir) get_mojave_mdl_file(os.path.join(base_dir, 'asu.tsv'), source, epoch, outdir=source_dir) epoch_ = "{}_{}_{}".format(*epoch.split('-')) download_mojave_uv_fits(source, epochs=[epoch_], download_dir=source_dir, bands=['u']) fname = mojave_uv_fits_fname(source, 'u', epoch_) uvdata = UVData(os.path.join(source_dir, fname)) print(uvdata.stokes) if 'RR' not in uvdata.stokes or 'LL' not in uvdata.stokes: continue # Refit difmap model modelfit_difmap(fname, "{}_{}.mdl".format(source, epoch), "{}_{}.mdl".format(source, epoch), niter=200, path=source_dir, mdl_path=source_dir, out_path=source_dir, show_difmap_output=True) # Create sample of 100 artificial data sets
# epoch_ = '2007-07-03' source = '0336-019' epoch = '2010_10_25' epoch_ = '2010-10-25' data_dir = '/home/ilya/github/vlbi_errors/examples/LC' data_dir = os.path.join(data_dir, source, epoch) if not os.path.exists(data_dir): os.makedirs(data_dir) # download_mojave_uv_fits(source, [epoch], download_dir=data_dir) path_to_script = '/home/ilya/github/vlbi_errors/difmap/final_clean_nw' uv_fits_fname = mojave_uv_fits_fname(source, 'u', epoch) uv_fits_path = os.path.join(data_dir, uv_fits_fname) # get_mojave_mdl_file('/home/ilya/Dropbox/papers/boot/new_pics/mojave_mod_first/asu.tsv', # source, epoch_, outfile='initial.mdl', outdir=data_dir) uvdata = UVData(uv_fits_path) # modelfit_difmap(uv_fits_fname, 'initial.mdl', # 'initial.mdl', niter=300, # path=data_dir, mdl_path=data_dir, # out_path=data_dir) original_model_path = os.path.join(data_dir, 'initial.mdl') from spydiff import import_difmap_model, clean_difmap comps = import_difmap_model(original_model_path) from automodel import plot_clean_image_and_components path_to_script = '/home/ilya/github/vlbi_errors/difmap/final_clean_nw' # clean_difmap(uv_fits_path, os.path.join(data_dir, 'cc.fits'), 'I', # (1024, 0.1), path=data_dir, path_to_script=path_to_script, # outpath=data_dir) from from_fits import create_clean_image_from_fits_file ccimage = create_clean_image_from_fits_file(
data_dir = '/home/ilya/Dropbox/papers/boot/bias/new/stationary' # download_mojave_uv_fits(source, epochs=[epoch], bands=['u'], # download_dir=data_dir) uv_fits_fnames = { freq: mojave_uv_fits_fname(source, freq, epoch) for freq in ('x', 'j', 'u') } for freq, uv_fits_fname in uv_fits_fnames.items(): uv_fits_path = os.path.join(data_dir, uv_fits_fname) cg1 = CGComponent(2.0, 0., 0., 0.2) cg2 = CGComponent(1.0, 0., 0.3, 0.3) cg3 = CGComponent(0.5, 0., 1.5, 0.4) mdl = Model(stokes='I') mdl.add_components(cg1, cg2, cg3) uvdata = UVData(uv_fits_path) noise = uvdata.noise() for i in range(1, 101): uvdata = UVData(uv_fits_path) uvdata.substitute([mdl]) uvdata.noise_add(noise) art_fits_fname = 'art_{}_{}.fits'.format(freq, i) art_fits_path = os.path.join(data_dir, art_fits_fname) uvdata.save(art_fits_path) # Here we should MCMC posterior modelfit_difmap(art_fits_fname, 'initial.mdl', 'out_{}_{}.mdl'.format(freq, i), niter=100, path=data_dir,
def create_coverage_map(original_uv_fits_path, ci_type, original_cc_fits_path=None, imsize=None, outdir=None, n_boot=200, path_to_script=None, alpha=0.68, n_cov=100, n_rms=1., stokes='I', boot_cc_fits_paths=None, sample_cc_fits_paths=None): """ Conduct coverage analysis of image pixels flux CI. Find number of times when CI of `observed` value contains values of `samples`. :param original_uv_fits_path: Path to original FITS-file with uv-data. :param ci_type: Type of CI to test. ``boot`` or ``rms``. If ``boot`` then use residuals bootstrap CI. If ``rms`` then use Hovatta corrected image rms CI. :param original_cc_fits_path: (optional) Path to original FITS-file with CC model. If ``None`` then use ``imsize`` parameter to get `original` CC model from ``original_uv_fits_path``. (default: ``None``) :param imsize: (optional) Image parameters (image size [pix], pixel size [mas]) to use when doing first CC with ``original_cc_fits_path = None``. (default: ``None``) :param outdir: (optional) Directory to store intermediate results. If ``None`` then use CWD. (default: ``None``) :param n_boot: (optional) Number of bootstrap replications to use when calculating bootstrap CI for ``ci_type = boot`` option when ``boot_cc_fits_paths`` hasn't specified. (default: ``200``) :param path_to_script: (optional) Path to Dan Homan's script for final clean. If ``None`` then use CWD. (default: ``None``) :param alpha: (optional) Level of significance when calculating bootstrap CI for ``ci_type = boot`` case. E.g. ``0.68`` corresponds to `1 \sigma`. (default: ``0.68``) :param n_cov: (optional) Number of `samples` from infinite population to consider in coverage analysis of intervals. Here `samples` - observations of known source with different realisations of noise with known parameters. (default: ``100``) :param n_rms: (optional) Number of rms to use in ``ci_type = rms`` case. (default: ``1.``) :param stokes: (optional) Stokes parameter to use. If ``None`` then use ``I``. (default: ``None``) :param boot_cc_fits_paths: (optional) If ``ci_type = boot`` then this parameter could specify paths to cleaned bootstrapped uv-data. :param sample_cc_fits_paths: (optional) Path to FITS-files with CLEAN models of `sample` uv-data. If ``None`` then create ``n_cov`` `sample` uv-data from noise of `original` uv-data and `original` CLEAN model. (default: ``None``) :return: Coverage map. Each pixel contain frequency of times when samples from population hit inside CI for given pixel. """ # If not given `original` CLEAN model - get it by cleaning `original` # uv-data if original_cc_fits_path is None: print( "No `original` CLEAN model specified! Will CLEAN `original`" " uv-data.") if imsize is None: raise Exception("Specify ``imsize``") uv_fits_dir, uv_fits_fname = os.path.split(original_uv_fits_path) print("Cleaning `original` uv-data to" " {}".format(os.path.join(outdir, 'cc.fits'))) clean_difmap(uv_fits_fname, 'cc.fits', stokes, imsize, path=uv_fits_dir, path_to_script=path_to_script, outpath=outdir) original_cc_fits_path = os.path.join(outdir, 'cc.fits') original_uv_data = UVData(original_uv_fits_path) noise = original_uv_data.noise() original_model = create_model_from_fits_file(original_cc_fits_path) # Find images parameters for cleaning if necessary if imsize is None: print( "Getting image parameters from `original`" " CLEAN FITS file {}.".format(original_cc_fits_path)) image_params = get_fits_image_info(original_cc_fits_path) imsize = (image_params['imsize'][0], abs(image_params['pixsize'][0]) / mas_to_rad) # Substitute uv-data with original model and create `model` uv-data print("Substituting original uv-data with CLEAN model...") model_uv_data = copy.deepcopy(original_uv_data) model_uv_data.substitute([original_model]) # Add noise to `model` uv-data to get `observed` uv-data observed_uv_data = copy.deepcopy(model_uv_data) observed_uv_data.noise_add(noise) observed_uv_fits_path = os.path.join(outdir, 'observed_uv.uvf') if os.path.isfile(observed_uv_fits_path): os.unlink(observed_uv_fits_path) print("Adding noise to `model` uv-data to get `observed` uv-data...") observed_uv_data.save(fname=observed_uv_fits_path) observed_cc_fits_path = os.path.join(outdir, 'observed_cc.fits') if os.path.isfile(observed_cc_fits_path): os.unlink(observed_cc_fits_path) # Clean `observed` uv-data to get `observed` image and model print("Cleaning `observed` uv-data to `observed` CLEAN model...") clean_difmap('observed_uv.uvf', 'observed_cc.fits', original_model.stokes, imsize, path=outdir, path_to_script=path_to_script, outpath=outdir) # Get `observed` model and image observed_model = create_model_from_fits_file(observed_cc_fits_path) observed_image = create_image_from_fits_file(observed_cc_fits_path) # Testing coverage of bootstrapped CI if ci_type == 'boot': # Bootstrap and clean only when necessary if boot_cc_fits_paths is None: # Bootstrap `observed` uv-data with `observed` model boot = CleanBootstrap([observed_model], observed_uv_data) cwd = os.getcwd() path_to_script = path_to_script or cwd os.chdir(outdir) print("Bootstrapping uv-data with {} replications".format(n_boot)) boot.run(outname=['observed_uv_boot', '.uvf'], n=n_boot) os.chdir(cwd) boot_uv_fits_paths = sorted( glob.glob(os.path.join(outdir, 'observed_uv_boot*.uvf'))) # Clean each bootstrapped uv-data for i, uv_fits_path in enumerate(boot_uv_fits_paths): uv_fits_dir, uv_fits_fname = os.path.split(uv_fits_path) print("Cleaning {} bootstrapped observed" " uv-data to {}".format( uv_fits_path, os.path.join( outdir, 'observed_cc_boot_{}.fits'.format(i + 1)))) clean_difmap(uv_fits_fname, 'observed_cc_boot_{}.fits'.format(i + 1), original_model.stokes, imsize, path=uv_fits_dir, path_to_script=path_to_script, outpath=outdir) boot_cc_fits_paths = glob.glob( os.path.join(outdir, 'observed_cc_*.fits')) # Calculate bootstrap CI # hdi_low, hdi_high = boot_ci_bc(boot_cc_fits_paths, # observed_cc_fits_path, alpha=alpha) hdi_low, hdi_high = boot_ci(boot_cc_fits_paths, observed_cc_fits_path, alpha=alpha) elif ci_type == 'rms': # Calculate ``n_rms`` CI rms = observed_image.rms(region=(50, 50, 50, None)) rms = np.sqrt(rms**2. + (1.5 * rms**2.)**2.) hdi_low = observed_image.image - rms hdi_high = observed_image.image + rms else: raise Exception("CI intervals must be `boot` or `rms`!") # Create `sample` uv-data and clean it only when necessary if sample_cc_fits_paths is None: # Add noise to `model` uv-data ``n_cov`` times and get ``n_cov`` # `samples` from population sample_uv_fits_paths = list() for i in range(n_cov): sample_uv_data = copy.deepcopy(model_uv_data) sample_uv_data.noise_add(noise) sample_uv_fits_path = os.path.join(outdir, 'samle_uv_{}.uvf'.format(i + 1)) sample_uv_data.save(sample_uv_fits_path) sample_uv_fits_paths.append(sample_uv_fits_path) # Clean each `sample` FITS-file for i, uv_fits_path in enumerate(sample_uv_fits_paths): uv_fits_dir, uv_fits_fname = os.path.split(uv_fits_path) print("Cleaning {} sample uv-data to" " {}".format( uv_fits_path, os.path.join(outdir, 'sample_cc_{}.fits'.format(i + 1)))) clean_difmap(uv_fits_fname, 'sample_cc_{}.fits'.format(i + 1), original_model.stokes, imsize, path=uv_fits_dir, path_to_script=path_to_script, outpath=outdir) sample_cc_fits_paths = glob.glob( os.path.join(outdir, 'sample_cc_*.fits')) sample_images = list() for sample_cc_fits_path in sample_cc_fits_paths: image = create_image_from_fits_file(sample_cc_fits_path) sample_images.append(image.image) # For each pixel check how often flux in `sample` images lies in CI derived # for observed image. cov_array = np.zeros((imsize[0], imsize[0]), dtype=float) print("calculating CI intervals") for (x, y), value in np.ndenumerate(cov_array): for image in sample_images: cov_array[x, y] += float( np.logical_and(hdi_low[x, y] < image[x, y], image[x, y] < hdi_high[x, y])) return cov_array / n_cov
clean_difmap(uv_fits_x, 'x_cc.fits', 'I', (1024, 0.1), path=uvdata_dir, path_to_script=path_to_script, show_difmap_output=True, outpath=data_dir) # Clean original uv-data with common beam clean_difmap(uv_fits_x, 'x_cc_same.fits', 'I', (1024, 0.1), path=uvdata_dir, path_to_script=path_to_script, show_difmap_output=True, outpath=data_dir) ccimage_x = create_clean_image_from_fits_file(os.path.join(data_dir, 'x_cc.fits')) clean_difmap(uv_fits_u, 'u_cc_same.fits', 'I', (1024, 0.1), path=uvdata_dir, path_to_script=path_to_script, show_difmap_output=True, outpath=data_dir, beam_restore=ccimage_x.beam) u_model = create_model_from_fits_file(os.path.join(data_dir, 'u_cc.fits')) x_model = create_model_from_fits_file(os.path.join(data_dir, 'x_cc.fits')) u_uvdata = UVData(os.path.join(uvdata_dir, uv_fits_u)) x_uvdata = UVData(os.path.join(uvdata_dir, uv_fits_x)) # Bootstrap uv-data with original CLEAN models xboot = CleanBootstrap([x_model], x_uvdata) xboot.run(100, nonparametric=True, use_v=False, outname=['boot_x', '.fits']) uboot = CleanBootstrap([u_model], u_uvdata) uboot.run(100, nonparametric=True, use_v=False, outname=['boot_u', '.fits']) # Clean bootstrapped uv-data with common parameters x_boot_uvfits = sorted(glob.glob('boot_x_*.fits')) u_boot_uvfits = sorted(glob.glob('boot_u_*.fits')) for i, x_boot_uv in enumerate(x_boot_uvfits): clean_difmap(x_boot_uv, 'x_cc_same_{}.fits'.format(str(i+1).zfill(3)), 'I', (1024, 0.1), path_to_script=path_to_script, show_difmap_output=True,
def create_sample(original_uv_fits_path, original_cc_fits_path=None, imsize=None, outdir=None, path_to_script=None, n_sample=100, stokes='I'): """ Create `sample` from `true` or `model` source :param original_uv_fits_path: Path to original FITS-file with uv-data. :param original_cc_fits_path: (optional) Path to original FITS-file with CC model. If ``None`` then use ``imsize`` parameter to get `original` CC model from ``original_uv_fits_path``. (default: ``None``) :param imsize: (optional) Image parameters (image size [pix], pixel size [mas]) to use when doing first CC with ``original_cc_fits_path = None``. (default: ``None``) :param outdir: (optional) Directory to store intermediate results. If ``None`` then use CWD. (default: ``None``) :param path_to_script: (optional) Path to Dan Homan's script for final clean. If ``None`` then use CWD. (default: ``None``) :param n_sample: (optional) Number of `samples` from infinite population to consider in coverage analysis of intervals. Here `samples` - observations of known source with different realisations of noise with known parameters. (default: ``100``) :param stokes: (optional) Stokes parameter to use. If ``None`` then use ``I``. (default: ``None``) :return: Creates FITS-files with uv-data and CLEAN models of `sample`. """ # If not given `original` CLEAN model - get it by cleaning `original` # uv-data if original_cc_fits_path is None: print( "No `original` CLEAN model specified! Will CLEAN `original`" " uv-data.") if imsize is None: raise Exception("Specify ``imsize``") uv_fits_dir, uv_fits_fname = os.path.split(original_uv_fits_path) original_cc_fits_path = os.path.join(outdir, 'original_cc.fits') print( "Cleaning `original` uv-data to {}".format(original_cc_fits_path)) clean_difmap(uv_fits_fname, 'original_cc.fits', stokes, imsize, path=uv_fits_dir, path_to_script=path_to_script, outpath=outdir) original_uv_data = UVData(original_uv_fits_path) noise = original_uv_data.noise() original_model = create_model_from_fits_file(original_cc_fits_path) # Find images parameters for cleaning if necessary if imsize is None: print( "Getting image parameters from `original`" " CLEAN FITS file {}.".format(original_cc_fits_path)) image_params = get_fits_image_info(original_cc_fits_path) imsize = (image_params['imsize'][0], abs(image_params['pixsize'][0]) / mas_to_rad) # Substitute uv-data with original model and create `model` uv-data print("Substituting `original` uv-data with CLEAN model...") model_uv_data = copy.deepcopy(original_uv_data) model_uv_data.substitute([original_model]) # Create `sample` uv-data # Add noise to `model` uv-data ``n_cov`` times and get ``n_cov`` `samples` # from population sample_uv_fits_paths = list() print("Creating {} `samples` from population".format(n_sample)) for i in range(n_sample): sample_uv_data = copy.deepcopy(model_uv_data) sample_uv_data.noise_add(noise) sample_uv_fits_path = os.path.join( outdir, 'sample_uv_{}.uvf'.format(str(i + 1).zfill(3))) sample_uv_data.save(sample_uv_fits_path) sample_uv_fits_paths.append(sample_uv_fits_path) # Clean each `sample` FITS-file print("CLEANing `samples` uv-data") for uv_fits_path in sample_uv_fits_paths: uv_fits_dir, uv_fits_fname = os.path.split(uv_fits_path) j = uv_fits_fname.split('.')[0].split('_')[-1] print("Cleaning {} sample uv-data to" " {}".format(uv_fits_path, os.path.join(outdir, 'sample_cc_{}.fits'.format(j)))) clean_difmap(uv_fits_fname, 'sample_cc_{}.fits'.format(j), original_model.stokes, imsize, path=uv_fits_dir, path_to_script=path_to_script, outpath=outdir) sample_cc_fits_paths = sorted( glob.glob(os.path.join(outdir, 'sample_cc_*.fits'))) sample_uv_fits_paths = sorted( glob.glob(os.path.join(outdir, 'sample_uv_*.uvf'))) return sample_uv_fits_paths, sample_cc_fits_paths
def fit_model_with_nestle(uv_fits, model_file, components_priors, outdir=None, **nestle_kwargs): """ :param uv_fits: Path to uv-fits file with self-calibrated visibilities. :param model_file: Path to file with difmap model. :param components_priors: Components prior's ppf. Close to phase center component goes first. Iterable of dicts with keys - name of the parameter and values - (callable, args, kwargs,) where args & kwargs - additional arguments to callable. Each callable is called callable.ppf(p, *args, **kwargs). Thus callable should has ``ppf`` method. Example of prior on single component: {'flux': (scipy.stats.uniform.ppf, [0., 10.], dict(),), 'bmaj': (scipy.stats.uniform.ppf, [0, 5.], dict(),), 'e': (scipy.stats.beta.ppf, [alpha, beta], dict(),)} First key will result in calling: scipy.stats.uniform.ppf(u, 0, 10) as value from prior for ``flux`` parameter. :param outdir: (optional) Directory to output results. If ``None`` then use cwd. (default: ``None``) :param nestle_kwargs: (optional) Any arguments passed to ``nestle.sample`` function. :return Results of ``nestle.sample`` work on that model. """ if outdir is None: outdir = os.getcwd() mdl_file = model_file uv_data = UVData(uv_fits) mdl_dir, mdl_fname = os.path.split(mdl_file) comps = import_difmap_model(mdl_fname, mdl_dir) # Sort components by distance from phase center comps = sorted(comps, key=lambda x: np.sqrt(x.p[1]**2 + x.p[2]**2)) ppfs = list() labels = list() for component_prior in components_priors: for comp_name in ('flux', 'x', 'y', 'bmaj', 'e', 'bpa'): try: ppfs.append(_function_wrapper(*component_prior[comp_name])) labels.append(comp_name) except KeyError: pass for ppf in ppfs: print(ppf.args) hypercube = hypercube_partial(ppfs) # Create model mdl = Model(stokes=stokes) # Add components to model mdl.add_components(*comps) loglike = LnLikelihood(uv_data, mdl) time0 = time.time() result = nestle.sample(loglikelihood=loglike, prior_transform=hypercube, ndim=mdl.size, npoints=50, method='multi', callback=nestle.print_progress, **nestle_kwargs) print("Time spent : {}".format(time.time()-time0)) samples = nestle.resample_equal(result.samples, result.weights) # Save re-weighted samples from posterior to specified ``outdir`` # directory np.savetxt(os.path.join(outdir, 'samples.txt'), samples) fig = corner.corner(samples, show_titles=True, labels=labels, quantiles=[0.16, 0.5, 0.84], title_fmt='.3f') # Save corner plot os samples from posterior to specified ``outdir`` # directory fig.savefig(os.path.join(outdir, "corner.png"), bbox_inches='tight', dpi=200) return result
def create_sample(original_uv_fits, original_mdl_file, outdir=None, n_sample=100, stokes='I'): """ Create `sample` from `true` or `model` source :param outdir: (optional) Directory to store intermediate results. If ``None`` then use CWD. (default: ``None``) :param n_sample: (optional) Number of `samples` from infinite population to consider in coverage analysis of intervals. Here `samples` - observations of known source with different realisations of noise with known parameters. (default: ``100``) :param stokes: (optional) Stokes parameter to use. If ``None`` then use ``I``. (default: ``None``) """ original_uv_data = UVData(original_uv_fits) noise = original_uv_data.noise() path, _ = os.path.split(original_mdl_file) comps = import_difmap_model(original_mdl_file, path) original_model = Model(stokes=stokes) original_model.add_components(*comps) # Substitute uv-data with original model and create `model` uv-data print("Substituting `original` uv-data with CLEAN model...") model_uv_data = copy.deepcopy(original_uv_data) model_uv_data.substitute([original_model]) # Create `sample` uv-data # Add noise to `model` uv-data ``n_cov`` times and get ``n_cov`` `samples` # from population sample_uv_fits_paths = list() print("Creating {} `samples` from population".format(n_sample)) for i in range(n_sample): sample_uv_data = copy.deepcopy(model_uv_data) sample_uv_data.noise_add(noise) sample_uv_fits_path = os.path.join( outdir, 'sample_uv_{}.uvf'.format(str(i + 1).zfill(3))) sample_uv_data.save(sample_uv_fits_path) sample_uv_fits_paths.append(sample_uv_fits_path) # Fitting in difmap each `sample` FITS-file print("Fitting `samples` uv-data") for uv_fits_path in sample_uv_fits_paths: uv_fits_dir, uv_fits_fname = os.path.split(uv_fits_path) j = uv_fits_fname.split('.')[0].split('_')[-1] print("Fitting {} sample uv-data to" " {}".format( uv_fits_path, os.path.join(outdir, 'sample_model_{}.mdl'.format(j)))) modelfit_difmap(uv_fits_fname, original_mdl_file, 'sample_model_{}.mdl'.format(j), path=uv_fits_dir, mdl_path=uv_fits_dir, out_path=uv_fits_dir) sample_mdl_paths = sorted( glob.glob(os.path.join(outdir, 'sample_model_*.mdl'))) sample_uv_fits_paths = sorted( glob.glob(os.path.join(outdir, 'sample_uv_*.uvf'))) return sample_uv_fits_paths, sample_mdl_paths
class KFoldCV(object): def __init__(self, uv_fits_path, k, basename='cv', seed=None, baselines=None, stokes='I'): if stokes not in ('I', 'RR', 'LL'): raise Exception("Only stokes (I, RR, LL) supported!") self.stokes = stokes self.uv_fits_path = uv_fits_path self.uvdata = UVData(uv_fits_path) self.k = k self.seed = seed self.basename = basename self.test_fname_base = "{}_test".format(basename) self.train_fname_base = "{}_train".format(basename) self.baseline_folds = None self.create_folds(baselines) def create_folds(self, baselines=None): baseline_folds = dict() if baselines is None: baselines = self.uvdata.baselines if self.stokes == 'I': stokes = ['RR', 'LL'] average_stokes = True elif self.stokes == 'RR': stokes = ['RR'] average_stokes = False elif self.stokes == 'LL': stokes = ['LL'] average_stokes = False else: raise Exception("Only stokes (I, RR, LL) supported!") for bl in baselines: bl_indxs = self.uvdata._indxs_baselines[bl] print("Baseline {} has {}" " samples".format(bl, np.count_nonzero(bl_indxs))) bl_indxs_pw = self.uvdata.pw_indxs_baseline( bl, average_bands=True, stokes=stokes, average_stokes=average_stokes) bl_indxs = mask_boolean_with_boolean(bl_indxs, bl_indxs_pw) print("Baseline {} has {} samples with" " positive weight".format(bl, np.count_nonzero(bl_indxs))) try: kfold = KFold(self.k, shuffle=False, random_state=self.seed) baseline_folds[bl] = list() for train, test in kfold.split(np.count_nonzero(bl_indxs)): tr = to_boolean_array( np.nonzero(bl_indxs)[0][train], len(bl_indxs)) te = to_boolean_array( np.nonzero(bl_indxs)[0][test], len(bl_indxs)) baseline_folds[bl].append((tr, te)) # When ``k`` more then number of baseline samples except ValueError: pass # Add all other baselines data w/o folding - all data to train & nothing # to test rest_baselines = list(self.uvdata.baselines) for bl in baselines: rest_baselines.remove(bl) for bl in rest_baselines: baseline_folds[bl] = list() for bl in rest_baselines: bl_indxs = self.uvdata._indxs_baselines[bl] for k in range(self.k): baseline_folds[bl].append( (bl_indxs, np.zeros(len(bl_indxs), dtype=bool))) self.baseline_folds = baseline_folds def create_train_test_data(self, outdir=None): if outdir is None: outdir = os.getcwd() for i in range(self.k): train_indxs = np.zeros(len(self.uvdata.hdu.data)) test_indxs = np.zeros(len(self.uvdata.hdu.data)) for bl, kfolds in self.baseline_folds.items(): itrain, itest = kfolds[i] # itrain = to_boolean_array(itrain) train_indxs = np.logical_or(train_indxs, itrain) test_indxs = np.logical_or(test_indxs, itest) train_data = self.uvdata.hdu.data[train_indxs] test_data = self.uvdata.hdu.data[test_indxs] self.uvdata.save(os.path.join( outdir, self.test_fname_base + '_{}.fits'.format(i)), test_data, rewrite=True) self.uvdata.save(os.path.join( outdir, self.train_fname_base + '_{}.fits'.format(i)), train_data, rewrite=True) def cv_score(self, initial_dfm_model_path=None, data_dir=None, niter=100, path_to_script=None, mapsize_clean=None): if data_dir is None: data_dir = os.getcwd() train_uv_fits_paths = sorted( glob.glob(os.path.join(data_dir, self.train_fname_base + '*'))) test_uv_fits_paths = sorted( glob.glob(os.path.join(data_dir, self.test_fname_base + '*'))) cv_scores = list() train_scores = list() if initial_dfm_model_path is not None: for i, (train_uv_fits_path, test_uv_fits_path) in enumerate( zip(train_uv_fits_paths, test_uv_fits_paths)): print("Calculating CV-score for {} of {} splits".format( i + 1, self.k)) print("Training FITS: {}".format(train_uv_fits_path)) print("Testing FITS: {}".format(test_uv_fits_path)) out_mdl_fname = 'train_{}.mdl'.format(i) dfm_model_dir, dfm_model_fname = os.path.split( initial_dfm_model_path) modelfit_difmap(train_uv_fits_path, dfm_model_fname, out_mdl_fname, niter=niter, path=data_dir, mdl_path=dfm_model_dir, out_path=data_dir, stokes=self.stokes, show_difmap_output=True) cv_scores.append( score(test_uv_fits_path, os.path.join(data_dir, out_mdl_fname))) train_scores.append( score(train_uv_fits_path, os.path.join(data_dir, out_mdl_fname))) else: for i, (train_uv_fits_path, test_uv_fits_path) in enumerate( zip(train_uv_fits_paths, test_uv_fits_paths)): out_mdl_fname = 'train_{}.fits'.format(i) # This used when learning curves are created # clean_difmap(train_uv_fits_path, out_mdl_fname, 'I', # mapsize_clean, data_dir, path_to_script, # outpath=data_dir, show_difmap_output=True) # This used when different number of iterations are tested clean_n( train_uv_fits_path, out_mdl_fname, 'I', mapsize_clean, niter=niter, path_to_script=path_to_script, outpath=data_dir, show_difmap_output=True, ) cv_scores.append( score(test_uv_fits_path, os.path.join(data_dir, out_mdl_fname))) train_scores.append( score(train_uv_fits_path, os.path.join(data_dir, out_mdl_fname))) return cv_scores, train_scores
colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k'] # Workflow for one source source = '0945+408' epoch = '2007_04_18' band = 'u' # TODO: Standard it image_fname = 'original_cc.fits' uv_fname_cc = '0945+408.u.2007_04_18.uvf' uv_fname_uv = '0945+408.u.2007_04_18.uvf' dfm_model_fname = 'dfmp_original_model.mdl' comps = import_difmap_model(dfm_model_fname, base_path) model_uv = Model(stokes='I') model_uv.add_components(*comps) uvdata = UVData(os.path.join(base_path, uv_fname_uv)) uvdata_m = UVData(os.path.join(base_path, uv_fname_uv)) uvdata_m.substitute([model_uv]) uvdata_r = uvdata - uvdata_m # Plot uv-data label_size = 12 matplotlib.rcParams['xtick.labelsize'] = label_size matplotlib.rcParams['ytick.labelsize'] = label_size uvdata.uvplot(style='re&im', freq_average=True) matplotlib.pyplot.show() matplotlib.pyplot.savefig('/home/ilya/sandbox/heteroboot/uvdata_original.png', bbox_inches='tight', dpi=400) matplotlib.pyplot.close() # # Plot residuals in radplot
cv_scores = list() train_scores = list() for i, fname in enumerate(['1IF.fits', '12IF.fits', '123IF.fits', '1234IF.fits', '12345IF.fits', '123456IF.fits', '1234567IF.fits']): current_fits = os.path.join(data_dir, fname) modelfit_difmap(current_fits, original_model_fname, 'out_{}.mdl'.format(i), path=data_dir, mdl_path=data_dir, out_path=data_dir, niter=100) comps = import_difmap_model('out_{}.mdl'.format(i), data_dir) model = Model(stokes='I') model.add_components(*comps) # Calculate performance on training data uvdata_train_model = UVData(current_fits) uvdata_train = UVData(current_fits) uvdata_train_model.substitute([model]) uvdata_diff_train = uvdata_train - uvdata_train_model factor = np.count_nonzero(~uvdata_diff_train.uvdata_weight_masked.mask[:, :, :2]) squared_diff = uvdata_diff_train.uvdata_weight_masked[:, :, :2] *\ uvdata_diff_train.uvdata_weight_masked[:, :, :2].conj() score = float(np.sum(squared_diff)) / factor train_scores.append(score) # Calculate performance on test data uvdata_test_model = UVData(os.path.join(data_dir, '8IF.fits')) uvdata_test = UVData(os.path.join(data_dir, '8IF.fits')) uvdata_test_model.substitute([model]) uvdata_diff_test = uvdata_test - uvdata_test_model
download_dir=data_dir) # Fetch model file get_mojave_mdl_file(tsv_table, source, epoch, outdir=data_dir) # Clean uv-fits clean_difmap(uv_fits, 'cc.fits', 'I', [1024, 0.1], path=data_dir, path_to_script=path_to_script, outpath=data_dir) # Create clean image instance cc_image = create_clean_image_from_fits_file(os.path.join(data_dir, 'cc.fits')) comps = import_difmap_model(mdl_fname, data_dir) model = Model(stokes='I') model.add_components(*comps) # Check that model fits UV-data well uv_data = UVData(os.path.join(data_dir, uv_fits)) uv_data.uvplot() mdl_data = copy.deepcopy(uv_data) mdl_data.substitute([model]) mdl_data.uvplot(sym='.r') cc_image_ = copy.deepcopy(cc_image) cc_image_._image = np.zeros(cc_image._image.shape, dtype=float) cc_image_.add_model(model) plt.figure() plt.matshow(cc_image_.cc_image - cc_image.cc_image) plt.colorbar()
components_priors.append({'flux': (sp.stats.uniform.ppf, [0, 1], {}), 'x': (sp.stats.uniform.ppf, [-2, 4], {}), 'y': (sp.stats.uniform.ppf, [-2, 4], {}), 'bmaj': (sp.stats.uniform.ppf, [0, 1], {})}) components_priors.append({'flux': (sp.stats.uniform.ppf, [0, 1], {}), 'x': (sp.stats.uniform.ppf, [-5, 10], {}), 'y': (sp.stats.uniform.ppf, [-5, 10], {}), 'bmaj': (sp.stats.uniform.ppf, [0, 2], {})}) components_priors.append({'flux': (sp.stats.uniform.ppf, [0, 1], {}), 'x': (sp.stats.uniform.ppf, [-6, 12], {}), 'y': (sp.stats.uniform.ppf, [-6, 12], {}), 'bmaj': (sp.stats.uniform.ppf, [0, 3], {})}) results = fit_model_with_nestle(uv_fits, mdl_file, components_priors, outdir=outdir) data_dir = '/home/ilya/code/vlbi_errors/silke' # uv_fits = '0851+202.u.2012_11_11.uvf' uv_fits = '0851+202.u.2004_11_05.uvf' # mdl_fname = '2.mod.2012_11_11' mdl_fname = '1.mod.2004_11_05' uv_data = UVData(os.path.join(data_dir, uv_fits)) comps = import_difmap_model(mdl_fname, data_dir) model = Model(stokes='I') model.add_components(*comps) fig = uv_data.uvplot(style='a&p') uv_data.substitute([model]) uv_data.uvplot(color='r', fig=fig, phase_range=[-0.2, 0.2])