class KFoldCV(object): def __init__(self, fname, k, basename='cv', seed=None): self.fname = fname self.uvdata = UVData(fname) self.k = k self.seed = seed self.basename = basename self.test_fname = "{}_test.FITS".format(basename) self.train_fname = "{}_train.FITS".format(basename) self.baseline_folds = None self.create_folds() def create_folds(self): baseline_folds = dict() for bl, indxs in self.uvdata._indxs_baselines.items(): print "Baseline {} has {} samples".format(bl, np.count_nonzero(indxs)) try: kfold = KFold(np.count_nonzero(indxs), self.k, shuffle=True, random_state=self.seed) baseline_folds[bl] = list() for train, test in kfold: tr = to_boolean_array( np.nonzero(indxs)[0][train], len(indxs)) te = to_boolean_array( np.nonzero(indxs)[0][test], len(indxs)) baseline_folds[bl].append((tr, te)) # When ``k`` more then number of baseline samples except ValueError: pass self.baseline_folds = baseline_folds def __iter__(self): for i in xrange(self.k): train_indxs = np.zeros(len(self.uvdata.hdu.data)) test_indxs = np.zeros(len(self.uvdata.hdu.data)) for bl, kfolds in self.baseline_folds.items(): itrain, itest = kfolds[i] # itrain = to_boolean_array(itrain) train_indxs = np.logical_or(train_indxs, itrain) test_indxs = np.logical_or(test_indxs, itest) train_data = self.uvdata.hdu.data[train_indxs] test_data = self.uvdata.hdu.data[test_indxs] self.uvdata.save(self.test_fname, test_data, rewrite=True) self.uvdata.save(self.train_fname, train_data, rewrite=True) yield self.train_fname, self.test_fname
class KFoldCV(object): def __init__(self, uv_fits_path, k, basename='cv', seed=None, baselines=None, stokes='I'): if stokes not in ('I', 'RR', 'LL'): raise Exception("Only stokes (I, RR, LL) supported!") self.stokes = stokes self.uv_fits_path = uv_fits_path self.uvdata = UVData(uv_fits_path) self.k = k self.seed = seed self.basename = basename self.test_fname_base = "{}_test".format(basename) self.train_fname_base = "{}_train".format(basename) self.baseline_folds = None self.create_folds(baselines) def create_folds(self, baselines=None): baseline_folds = dict() if baselines is None: baselines = self.uvdata.baselines if self.stokes == 'I': stokes = ['RR', 'LL'] average_stokes = True elif self.stokes == 'RR': stokes = ['RR'] average_stokes = False elif self.stokes == 'LL': stokes = ['LL'] average_stokes = False else: raise Exception("Only stokes (I, RR, LL) supported!") for bl in baselines: bl_indxs = self.uvdata._indxs_baselines[bl] print("Baseline {} has {}" " samples".format(bl, np.count_nonzero(bl_indxs))) bl_indxs_pw = self.uvdata.pw_indxs_baseline( bl, average_bands=True, stokes=stokes, average_stokes=average_stokes) bl_indxs = mask_boolean_with_boolean(bl_indxs, bl_indxs_pw) print("Baseline {} has {} samples with" " positive weight".format(bl, np.count_nonzero(bl_indxs))) try: kfold = KFold(self.k, shuffle=False, random_state=self.seed) baseline_folds[bl] = list() for train, test in kfold.split(np.count_nonzero(bl_indxs)): tr = to_boolean_array( np.nonzero(bl_indxs)[0][train], len(bl_indxs)) te = to_boolean_array( np.nonzero(bl_indxs)[0][test], len(bl_indxs)) baseline_folds[bl].append((tr, te)) # When ``k`` more then number of baseline samples except ValueError: pass # Add all other baselines data w/o folding - all data to train & nothing # to test rest_baselines = list(self.uvdata.baselines) for bl in baselines: rest_baselines.remove(bl) for bl in rest_baselines: baseline_folds[bl] = list() for bl in rest_baselines: bl_indxs = self.uvdata._indxs_baselines[bl] for k in range(self.k): baseline_folds[bl].append( (bl_indxs, np.zeros(len(bl_indxs), dtype=bool))) self.baseline_folds = baseline_folds def create_train_test_data(self, outdir=None): if outdir is None: outdir = os.getcwd() for i in range(self.k): train_indxs = np.zeros(len(self.uvdata.hdu.data)) test_indxs = np.zeros(len(self.uvdata.hdu.data)) for bl, kfolds in self.baseline_folds.items(): itrain, itest = kfolds[i] # itrain = to_boolean_array(itrain) train_indxs = np.logical_or(train_indxs, itrain) test_indxs = np.logical_or(test_indxs, itest) train_data = self.uvdata.hdu.data[train_indxs] test_data = self.uvdata.hdu.data[test_indxs] self.uvdata.save(os.path.join( outdir, self.test_fname_base + '_{}.fits'.format(i)), test_data, rewrite=True) self.uvdata.save(os.path.join( outdir, self.train_fname_base + '_{}.fits'.format(i)), train_data, rewrite=True) def cv_score(self, initial_dfm_model_path=None, data_dir=None, niter=100, path_to_script=None, mapsize_clean=None): if data_dir is None: data_dir = os.getcwd() train_uv_fits_paths = sorted( glob.glob(os.path.join(data_dir, self.train_fname_base + '*'))) test_uv_fits_paths = sorted( glob.glob(os.path.join(data_dir, self.test_fname_base + '*'))) cv_scores = list() train_scores = list() if initial_dfm_model_path is not None: for i, (train_uv_fits_path, test_uv_fits_path) in enumerate( zip(train_uv_fits_paths, test_uv_fits_paths)): print("Calculating CV-score for {} of {} splits".format( i + 1, self.k)) print("Training FITS: {}".format(train_uv_fits_path)) print("Testing FITS: {}".format(test_uv_fits_path)) out_mdl_fname = 'train_{}.mdl'.format(i) dfm_model_dir, dfm_model_fname = os.path.split( initial_dfm_model_path) modelfit_difmap(train_uv_fits_path, dfm_model_fname, out_mdl_fname, niter=niter, path=data_dir, mdl_path=dfm_model_dir, out_path=data_dir, stokes=self.stokes, show_difmap_output=True) cv_scores.append( score(test_uv_fits_path, os.path.join(data_dir, out_mdl_fname))) train_scores.append( score(train_uv_fits_path, os.path.join(data_dir, out_mdl_fname))) else: for i, (train_uv_fits_path, test_uv_fits_path) in enumerate( zip(train_uv_fits_paths, test_uv_fits_paths)): out_mdl_fname = 'train_{}.fits'.format(i) # This used when learning curves are created # clean_difmap(train_uv_fits_path, out_mdl_fname, 'I', # mapsize_clean, data_dir, path_to_script, # outpath=data_dir, show_difmap_output=True) # This used when different number of iterations are tested clean_n( train_uv_fits_path, out_mdl_fname, 'I', mapsize_clean, niter=niter, path_to_script=path_to_script, outpath=data_dir, show_difmap_output=True, ) cv_scores.append( score(test_uv_fits_path, os.path.join(data_dir, out_mdl_fname))) train_scores.append( score(train_uv_fits_path, os.path.join(data_dir, out_mdl_fname))) return cv_scores, train_scores
for freq, uv_fits_fname in uv_fits_fnames.items(): uv_fits_path = os.path.join(data_dir, uv_fits_fname) cg1 = CGComponent(2.0, 0., 0., 0.2) cg2 = CGComponent(1.0, 0., 0.3, 0.3) cg3 = CGComponent(0.5, 0., 1.5, 0.4) mdl = Model(stokes='I') mdl.add_components(cg1, cg2, cg3) uvdata = UVData(uv_fits_path) noise = uvdata.noise() for i in range(1, 101): uvdata = UVData(uv_fits_path) uvdata.substitute([mdl]) uvdata.noise_add(noise) art_fits_fname = 'art_{}_{}.fits'.format(freq, i) art_fits_path = os.path.join(data_dir, art_fits_fname) uvdata.save(art_fits_path) # Here we should MCMC posterior modelfit_difmap(art_fits_fname, 'initial.mdl', 'out_{}_{}.mdl'.format(freq, i), niter=100, path=data_dir, mdl_path=data_dir, out_path=data_dir) params = list() for i in range(1, 101): comps = import_difmap_model('out_{}_{}.mdl'.format(freq, i), data_dir) params.append([ comps[0].p[0], comps[0].p[2], comps[1].p[0], comps[1].p[2],
# model.add_components(ccmodel) if stokes == "I": use_V = True else: use_V = False noise = uvdata_template.noise(use_V=use_V) params = list() for i in range(n_mc): uvdata_template.substitute([ccmodel]) uvdata_template.uvdata = uvdata_template.uvdata * corrections uvdata_template.noise_add(noise) uvdata_template.save(os.path.join(data_dir, "artificial.uvf"), rewrite=True, downscale_by_freq=True) # Self-calibrate selfcal_difmap( fname="artificial.uvf", outfname="artificial.uvf", path=data_dir, path_to_script="/home/ilya/github/ve/difmap/auto_selfcal", outpath=data_dir, show_difmap_output=True) modelfit_difmap("artificial.uvf", "{}.mod".format(epoch), "boot_artificial.mdl", niter=300,
for path in original_dfm_models: fname = os.path.split(path)[-1] epoch = fname[:-4] print("Processing epoch : {}".format(epoch)) if epoch in epochs_ready: print("Skipping epoch {}".format(epoch)) continue original_model_fname = fname original_model_path = os.path.join(data_dir, original_model_fname) uv_fits_fname = mojave_uv_fits_fname('0316+413', 'u', epoch) uv_fits_path = os.path.join(data_dir, uv_fits_fname) uvdata = UVData(uv_fits_path) try: del uvdata.hdu.header['HISTORY'] uvdata.save(rewrite=True, downscale_by_freq=False) except KeyError: pass out_txt_file = os.path.join(txt_file_dir, 'errors_{}.mod'.format(epoch)) out_png_file = os.path.join(txt_file_dir, 'errors_{}.png'.format(epoch)) try: bootstrap_uvfits_with_difmap_model(uv_fits_path, original_model_path, n_boot=100, boot_dir=boot_dir, out_txt_file=out_txt_file, out_plot_file=out_png_file, clean_after=True, niter=200, out_rchisq_file=os.path.join(
x *= mas_to_rad y *= mas_to_rad image = transfer.image() icomp = ImageComponent(image, x[0], y[..., 0]) uvdata = UVData(os.path.join(data_dir, '0952+179.U1.2007_04_30.PINAL')) # uvdata = UVData('/home/ilya/Dropbox/ACC/3c120/uvdata/0430+052.u.2006_05_24.uvf') # uvdata = UVData('/home/ilya/Dropbox/ACC/3c120/uvdata/0430+052.u.2006_05_24.uvf') model = Model(stokes='I') model.add_component(icomp) noise = uvdata.noise(use_V=True) uvdata.substitute([model]) for bl in noise: noise[bl] *= 10 uvdata.noise_add(noise) uvdata.save(os.path.join(data_dir, '0952_15GHz_BK.fits')) # clean_difmap('15GHz_BK.fits', 'u_BK_cc.fits', 'I', (1024, 0.1), path=data_dir, # path_to_script=path_to_script, show_difmap_output=True, # outpath=data_dir) ccimage = create_clean_image_from_fits_file(os.path.join(data_dir, '0952_15GHz_BK_cc.fits')) beam = ccimage.beam rms = rms_image(ccimage) blc, trc = find_bbox(ccimage.image, rms, 10) iplot(ccimage.image, x=ccimage.x, y=ccimage.y, min_abs_level=3*rms, beam=beam, show_beam=True, blc=blc, trc=trc, core=tuple(p_map)) r = np.array([9.86391978e-01, 6.43996321e-01, 3.53391595e-01]) r_15 = np.array(r) - r[-1] nu = np.array([5., 8., 15.])
# DOESNT WORK - ERRORS STILL SMALL import os from mojave import mojave_uv_fits_fname from uv_data import UVData from bootstrap import bootstrap_uvfits_with_difmap_model data_dir = '/home/ilya/silke' epoch = '2017_01_28' original_model_fname = '2017_01_28us' original_model_path = os.path.join(data_dir, original_model_fname) uv_fits_fname = mojave_uv_fits_fname('0851+202', 'u', epoch) uv_fits_path = os.path.join(data_dir, uv_fits_fname) uvdata = UVData(uv_fits_path) uvdata.noise_add({baseline: [0.137] for baseline in uvdata.baselines}) new_fits_path = os.path.join(data_dir, 'added_noise.fits') uvdata.save(new_fits_path) bootstrap_uvfits_with_difmap_model(new_fits_path, original_model_path, boot_dir=os.path.join(data_dir, 'boot'))
cg2 = CGComponent(0.5, 0., -2., 0.55) cg2.add_prior(flux=( sp.stats.uniform.logpdf, [0., 5.], dict(), ), bmaj=( sp.stats.uniform.logpdf, [0, 20.], dict(), )) model = Model(stokes='I') model.add_components(cg1, cg2) uvdata.substitute([model]) uvdata.noise_add(noise) uvdata.save(os.path.join(data_dir, 'fake.uvf')) # Clean uv-data clean_difmap( 'fake.uvf', 'fake_cc.fits', 'I', (1024, 0.1), path=data_dir, path_to_script='/home/ilya/code/vlbi_errors/difmap/final_clean_nw', outpath=data_dir, show_difmap_output=True) image = create_clean_image_from_fits_file( os.path.join(data_dir, 'fake_cc.fits')) rms = rms_image(image) blc, trc = find_bbox(image.image, 2. * rms, delta=int(image._beam.beam[0])) # Plot image iplot(image.image,
y, z = np.meshgrid(np.arange(imsize[0]), np.arange(imsize[1])) y = y - imsize[0] / 2. + 0.5 z = z - imsize[0] / 2. + 0.5 y_mas = y * mas_in_pix z_mas = z * mas_in_pix y_rad = mas_to_rad * y_mas z_rad = mas_to_rad * z_mas icomp = ImageComponent(image_g, y_rad[0, :], z_rad[:, 0]) noise = uvdata.noise(use_V=True) for key, value in noise.items(): noise[key] = 0.1 * value model = Model(stokes='I') model.add_component(icomp) # jet_comp = CGComponent(0.5, 1., 0., 0.3) # model.add_component(jet_comp) uvdata.substitute([model]) uvdata.noise_add(noise) uvdata.save('/home/ilya/github/bck/jetshow/uvf/test.fits', rewrite=True) modelfit_difmap('test.fits', 'initial_cg.mdl', 'out_test.mdl', niter=300, path='/home/ilya/github/bck/jetshow/uvf', mdl_path='/home/ilya/github/bck/jetshow', out_path='/home/ilya/github/bck/jetshow/uvf')