def run(self, modelcard=None, testcard=None, stokes='I'): """ Method that cross-validates set of image-plane models obtained by modelling training samples on corresponding set of testing samples. :param modelfiles: Wildcard of file names ~ 'model_0i_0jofN.txt', where model in 'model_0i_0jofN.txt' file is from modelling ``0j``-th training sample ('train_0jofN.FITS') with ``0i``-th model. :param testfiles: Wildcard of file names ~ 'test_0jofN.FITS'. :return: List of lists [modelfilename, CV-score, sigma_cv_score]. """ modelfiles = glob.glob(modelcard) testfiles = glob.glob(testcard) modelfiles.sort() testfiles.sort() ntest = len(testfiles) nmodels = len(modelfiles) / ntest assert (not len(modelfiles) % float(len(testfiles))) print("modelfiles : " + str(modelfiles)) print("testfiles : " + str(testfiles)) result = list() for i in range(nmodels): print("Using models " + str(modelfiles[ntest * i:ntest * (i + 1)]) + " and testing sample " + str(testfiles)) models = modelfiles[ntest * i:ntest * (i + 1)] cv_scores = list() for j, testfile in enumerate(testfiles): model = Model() model.add_from_txt(models[j], stoke=stokes) print("Using test file " + str(testfile)) data = UVData(testfile) cv_score = data.cv_score(model, stokes=stokes) print("cv_score for one testing sample is " + str(cv_score)) cv_scores.append(cv_score) mean_cv_score = np.mean(cv_scores) std_cv_score = np.std(cv_scores) print(mean_cv_score, std_cv_score) result.append(["model#" + str(i + 1), mean_cv_score, std_cv_score]) return result
uvdata = UVData(os.path.join(data_dir, uv_fits)) original_comps = import_difmap_model(mdl_file, data_dir) lnpost, sampler = fit_model_with_mcmc(os.path.join(data_dir, uv_fits), os.path.join(data_dir, mdl_file), samples_file='samples_of_mcmc.txt', outdir='/home/ilya/code/vlbi_errors/bin_c1/') samples = sampler.flatchain[::10, :] # Create a sample of models with parameters from posterior distribution models = list() for i, s in enumerate(samples[np.random.randint(len(samples), size=100)]): model = Model(stokes='I') j = 0 for orig_comp in original_comps: comp = orig_comp.__class__(*(s[j: j + orig_comp.size])) model.add_component(comp) j += orig_comp.size models.append(model) cv_scores = list() for model in models: cv_scores.append(uvdata.cv_score(model, baselines=[774, 1546])) np.savetxt(os.path.join(data_dir, 'cv_scores_eg.txt'), np.array(cv_scores)) # # Now check delta # modelfit_difmap(uv_fits, '0235+164.c1.2008_09_02_cgauss.mdl', # '0235+164.c1.2008_09_02_cgauss_fitted_fitted.mdl', niter=100, # path=data_dir, mdl_path=data_dir, out_path=data_dir)
n_folds = 10 for niter in cc_pars: print "Using niter = {}".format(niter) kfold = KFoldCV(uv_fits, n_folds) cv = list() for j, (tr_fname, ts_fname) in enumerate(kfold): clean_n(kfold.train_fname, 'trained_model_{}.FITS'.format(niter), 'I', (1024, 0.1), niter=niter, path_to_script=path_to_script, show_difmap_output=True) tr_model = create_model_from_fits_file( 'trained_model_{}.FITS'.format(niter)) ts_uvdata = UVData(ts_fname) score = ts_uvdata.cv_score(tr_model) print "{} of {} gives {}".format(j + 1, n_folds, score) cv.append(score) cv_scores[niter] = (np.nanmean(cv), np.nanstd(cv)) print "CV gives {} +/- {}".format(np.nanmean(cv), np.nanstd(cv)) print cv_scores n = cv_scores.keys() scores = [cv_scores[i][0] for i in n] errors = [cv_scores[i][1] for i in n] import matplotlib label_size = 12 matplotlib.rcParams['xtick.labelsize'] = label_size matplotlib.rcParams['ytick.labelsize'] = label_size matplotlib.rcParams['axes.titlesize'] = label_size