Exemple #1
0
 def __init__(self, fname, k, basename='cv', seed=None):
     self.fname = fname
     self.uvdata = UVData(fname)
     self.k = k
     self.seed = seed
     self.basename = basename
     self.test_fname = "{}_test.FITS".format(basename)
     self.train_fname = "{}_train.FITS".format(basename)
     self.baseline_folds = None
     self.create_folds()
Exemple #2
0
def learning_curve(uv_fits_path, fracs, K, initial_dfm_model_path=None,
                   n_iter=100, mapsize_clean=(512, 0.1), path_to_script=None,
                   n_splits=10, data_dir=None, ls_cv='-',
                   ls_train='-', plot=False):

    uvdata = UVData(uv_fits_path)
    cv_means = dict()
    train_means = dict()
    for frac in fracs:
        cv_means[frac] = list()
        train_means[frac] = list()
        for i in range(n_splits):
            uv_frac_path = os.path.join(data_dir, 'frac_{}.fits'.format(frac))
            uvdata.save_fraction(uv_frac_path, frac,
                                 random_state=np.random.randint(0, 1000))
            kfold = KFoldCV(uv_frac_path, K, seed=np.random.randint(0, 1000))
            kfold.create_train_test_data(outdir=data_dir)
            cv_scores, train_scores = kfold.cv_score(initial_dfm_model_path=initial_dfm_model_path,
                                                     data_dir=data_dir,
                                                     niter=n_iter,
                                                     mapsize_clean=mapsize_clean,
                                                     path_to_script=path_to_script)
            cv_means[frac].append(np.mean(cv_scores))
            train_means[frac].append(np.mean(train_scores))

    # CV-score for full data
    cv_means[1.0] = list()
    train_means[1.0] = list()
    for i in range(n_splits):
        kfold = KFoldCV(uv_fits_path, K, seed=np.random.randint(0, 1000))
        kfold.create_train_test_data(outdir=data_dir)
        cv_scores, train_scores = kfold.cv_score(initial_dfm_model_path=initial_dfm_model_path,
                                                 data_dir=data_dir,
                                                 niter=n_iter,
                                                 mapsize_clean=mapsize_clean,
                                                 path_to_script=path_to_script)
        cv_means[1.0].append(np.mean(cv_scores))
        train_means[1.0].append(np.mean(train_scores))

    if plot:
        fig, axes = plt.subplots()
        axes.errorbar(sorted(cv_means.keys()),
                      y=[np.mean(cv_means[frac]) for frac in sorted(cv_means.keys())],
                      yerr=[np.std(cv_means[frac]) for frac in sorted(cv_means.keys())],
                      label='CV', ls=ls_cv)
        axes.errorbar(sorted(train_means.keys()),
                      y=[np.mean(train_means[frac]) for frac in sorted(train_means.keys())],
                      yerr=[np.std(train_means[frac]) for frac in sorted(train_means.keys())],
                      label='Train', ls=ls_train)
        axes.legend()
        axes.set_xlabel("Frac. of training data")
        axes.set_ylabel("RMSE")
        fig.show()

    return cv_means, train_means
Exemple #3
0
    def run(self, modelcard=None, testcard=None, stokes='I'):
        """
        Method that cross-validates set of image-plane models obtained by
        modelling training samples on corresponding set of testing samples.

        :param modelfiles:
            Wildcard of file names ~ 'model_0i_0jofN.txt', where model in
            'model_0i_0jofN.txt' file is from modelling ``0j``-th training
            sample ('train_0jofN.FITS') with ``0i``-th model.

        :param testfiles:
            Wildcard of file names ~ 'test_0jofN.FITS'.

        :return:
            List of lists [modelfilename, CV-score, sigma_cv_score].
        """

        modelfiles = glob.glob(modelcard)
        testfiles = glob.glob(testcard)
        modelfiles.sort()
        testfiles.sort()
        ntest = len(testfiles)
        nmodels = len(modelfiles) / ntest

        assert (not len(modelfiles) % float(len(testfiles)))

        print("modelfiles : " + str(modelfiles))
        print("testfiles : " + str(testfiles))

        result = list()

        for i in range(nmodels):
            print("Using models " + str(modelfiles[ntest * i:ntest *
                                                   (i + 1)]) +
                  " and testing sample " + str(testfiles))
            models = modelfiles[ntest * i:ntest * (i + 1)]
            cv_scores = list()
            for j, testfile in enumerate(testfiles):
                model = Model()
                model.add_from_txt(models[j], stoke=stokes)
                print("Using test file " + str(testfile))
                data = UVData(testfile)
                cv_score = data.cv_score(model, stokes=stokes)
                print("cv_score for one testing sample is " + str(cv_score))
                cv_scores.append(cv_score)

            mean_cv_score = np.mean(cv_scores)
            std_cv_score = np.std(cv_scores)
            print(mean_cv_score, std_cv_score)

            result.append(["model#" + str(i + 1), mean_cv_score, std_cv_score])

        return result
Exemple #4
0
class KFoldCV(object):
    def __init__(self, fname, k, basename='cv', seed=None):
        self.fname = fname
        self.uvdata = UVData(fname)
        self.k = k
        self.seed = seed
        self.basename = basename
        self.test_fname = "{}_test.FITS".format(basename)
        self.train_fname = "{}_train.FITS".format(basename)
        self.baseline_folds = None
        self.create_folds()

    def create_folds(self):
        baseline_folds = dict()
        for bl, indxs in self.uvdata._indxs_baselines.items():
            print "Baseline {} has {} samples".format(bl,
                                                      np.count_nonzero(indxs))
            try:
                kfold = KFold(np.count_nonzero(indxs),
                              self.k,
                              shuffle=True,
                              random_state=self.seed)
                baseline_folds[bl] = list()
                for train, test in kfold:
                    tr = to_boolean_array(
                        np.nonzero(indxs)[0][train], len(indxs))
                    te = to_boolean_array(
                        np.nonzero(indxs)[0][test], len(indxs))
                    baseline_folds[bl].append((tr, te))
            # When ``k`` more then number of baseline samples
            except ValueError:
                pass
        self.baseline_folds = baseline_folds

    def __iter__(self):
        for i in xrange(self.k):
            train_indxs = np.zeros(len(self.uvdata.hdu.data))
            test_indxs = np.zeros(len(self.uvdata.hdu.data))
            for bl, kfolds in self.baseline_folds.items():
                itrain, itest = kfolds[i]
                # itrain = to_boolean_array(itrain)
                train_indxs = np.logical_or(train_indxs, itrain)
                test_indxs = np.logical_or(test_indxs, itest)
            train_data = self.uvdata.hdu.data[train_indxs]
            test_data = self.uvdata.hdu.data[test_indxs]
            self.uvdata.save(self.test_fname, test_data, rewrite=True)
            self.uvdata.save(self.train_fname, train_data, rewrite=True)

            yield self.train_fname, self.test_fname
Exemple #5
0
def create_bootstrap_sample(uvdata_dict, ccfits_dict, data_dir, n_boot=10):
    """
    Create ``n_boot`` bootstrap replications of the original UV-data with
    given several Stokes CC-models for each band.

    :param uvdata_dict:
        Dictionary with keys - bands and values - files with uv-data.
    :param ccfits_dict:
        Dictionary with keys - bands, stokes and values - files with CC-fits
        files with models for given band and Stokes.

    Creates ``n_boot`` UV-data files for each band with names
    ``boot_band_i.uvf`` in ``data_dir``.
    """
    print("Bootstrap uv-data with CLEAN-models...")
    for band, uv_fits in uvdata_dict.items():
        uvdata = UVData(os.path.join(data_dir, uv_fits))
        # print("Band = {}".format(band))
        models = list()
        for stokes, cc_fits in ccfits_dict[band].items():
            # print("Stokes = {}".format(stokes))
            ccmodel = create_model_from_fits_file(
                os.path.join(data_dir, cc_fits))
            models.append(ccmodel)

        boot = CleanBootstrap(models, uvdata)
        curdir = os.getcwd()
        os.chdir(data_dir)
        boot.run(n=n_boot,
                 nonparametric=False,
                 use_v=False,
                 use_kde=True,
                 outname=['boot_{}'.format(band), '.uvf'])
        os.chdir(curdir)
Exemple #6
0
 def __init__(self, original_uvfits, outdir):
     self.original_uvfits = original_uvfits
     self.uvdata = UVData(original_uvfits)
     self.outdir = outdir
     self.train_uvfits = "cv_train.uvf"
     self.test_uvfits = "cv_test.uvf"
     self.cur_bl = None
     self.cur_scan = None
Exemple #7
0
 def load_uvdata(self):
     self.uvdata_dict = dict()
     self.uvfits_dict = dict()
     for fits_file in self.original_fits_files:
         print("Loading UV-FITS file {}".format(os.path.split(fits_file)[-1]))
         uvdata = UVData(fits_file)
         self.uvdata_dict.update({uvdata.band_center: uvdata})
         self.uvfits_dict.update({uvdata.band_center: fits_file})
Exemple #8
0
def bootstrap_uvfits_with_difmap_model(
        uv_fits_path,
        dfm_model_path,
        nonparametric=False,
        use_kde=False,
        use_v=False,
        n_boot=100,
        stokes='I',
        boot_dir=None,
        recenter=True,
        pairs=False,
        niter=100,
        bootstrapped_uv_fits=None,
        additional_noise=None,
        boot_mdl_outname_base="bootstrapped_model"):
    dfm_model_dir, dfm_model_fname = os.path.split(dfm_model_path)
    comps = import_difmap_model(dfm_model_fname, dfm_model_dir)
    if boot_dir is None:
        boot_dir = os.getcwd()
    if bootstrapped_uv_fits is None:
        uvdata = UVData(uv_fits_path)
        model = Model(stokes=stokes)
        model.add_components(*comps)
        boot = CleanBootstrap([model],
                              uvdata,
                              additional_noise=additional_noise)
        os.chdir(boot_dir)
        boot.run(nonparametric=nonparametric,
                 use_kde=use_kde,
                 recenter=recenter,
                 use_v=use_v,
                 n=n_boot,
                 pairs=pairs)
        bootstrapped_uv_fits = sorted(
            glob.glob(os.path.join(boot_dir, 'bootstrapped_data*.fits')))
    for j, bootstrapped_fits in enumerate(bootstrapped_uv_fits):
        modelfit_difmap(bootstrapped_fits,
                        dfm_model_fname,
                        '{}_{}.mdl'.format(boot_mdl_outname_base, j),
                        path=boot_dir,
                        mdl_path=dfm_model_dir,
                        out_path=boot_dir,
                        niter=niter)
    booted_mdl_paths = glob.glob(
        os.path.join(boot_dir, '{}*'.format(boot_mdl_outname_base)))

    # Clean uv_fits
    for file_ in bootstrapped_uv_fits:
        os.unlink(file_)
    logs = glob.glob(os.path.join(boot_dir, "*.log*"))
    for file_ in logs:
        os.unlink(file_)
    comms = glob.glob(os.path.join(boot_dir, "*commands*"))
    for file_ in comms:
        os.unlink(file_)

    return booted_mdl_paths
Exemple #9
0
def score(uv_fits_path, mdl_path, stokes='I'):
    """
    Returns rms of model on given uv-data for stokes 'I'.
    
    :param uv_fits_path: 
        Path to uv-fits file.
    :param mdl_path: 
        Path to difmap model text file or FITS-file with CLEAN model.
    :param stokes: (optional)
        Stokes parameter string. ``I``, ``RR`` or ``LL`` currently supported.
        (default: ``I``)
    :return: 
        Per-point rms between given data and model evaluated at given data
        points.
    """
    if stokes not in ('I', 'RR', 'LL'):
        raise Exception("Only stokes (I, RR, LL) supported!")
    uvdata = UVData(uv_fits_path)
    uvdata_model = UVData(uv_fits_path)
    try:
        model = create_model_from_fits_file(mdl_path)
    except IOError:
        dfm_mdl_dir, dfm_mdl_fname = os.path.split(mdl_path)
        comps = import_difmap_model(dfm_mdl_fname, dfm_mdl_dir)
        model = Model(stokes=stokes)
        model.add_components(*comps)
    uvdata_model.substitute([model])
    uvdata_diff = uvdata - uvdata_model
    if stokes == 'I':
        i_diff = 0.5 * (uvdata_diff.uvdata_weight_masked[..., 0] +
                        uvdata_diff.uvdata_weight_masked[..., 1])
    elif stokes == 'RR':
        i_diff = uvdata_diff.uvdata_weight_masked[..., 0]
    elif stokes == 'LL':
        i_diff = uvdata_diff.uvdata_weight_masked[..., 1]
    else:
        raise Exception("Only stokes (I, RR, LL) supported!")
    # 2 means that Re & Im are counted independently
    factor = 2 * np.count_nonzero(i_diff)
    # factor = np.count_nonzero(~uvdata_diff.uvdata_weight_masked.mask[:, :, :2])
    # squared_diff = uvdata_diff.uvdata_weight_masked[:, :, :2] * \
    #                uvdata_diff.uvdata_weight_masked[:, :, :2].conj()
    squared_diff = i_diff * i_diff.conj()
    return np.sqrt(float(np.sum(squared_diff)) / factor)
Exemple #10
0
 def __init__(self,
              uv_fits_path,
              k,
              basename='cv',
              seed=None,
              baselines=None,
              stokes='I'):
     if stokes not in ('I', 'RR', 'LL'):
         raise Exception("Only stokes (I, RR, LL) supported!")
     self.stokes = stokes
     self.uv_fits_path = uv_fits_path
     self.uvdata = UVData(uv_fits_path)
     self.k = k
     self.seed = seed
     self.basename = basename
     self.test_fname_base = "{}_test".format(basename)
     self.train_fname_base = "{}_train".format(basename)
     self.baseline_folds = None
     self.create_folds(baselines)
Exemple #11
0
def bootstrap_uv_fits(uv_fits_path,
                      cc_fits_paths,
                      n,
                      outpath=None,
                      outname=None):
    """
    Function that bootstraps uv-data in user-specified FITS-files and
    FITS-files with clean components.

    :param uv_fits_path:
        Path to fits file with self-calibrated uv-data.
    :param cc_fits_paths:
        Iterable of paths to files with CC models.
    :param n:
        Number of bootstrap realizations.
    :param outpath: (optional)
        Directory to save bootstrapped uv-data FITS-files. If ``None``
        then use CWD. (default: ``None``)
    :param outname: (optional)
        How to name bootstrapped uv-data FITS-files. If ``None`` then
        use default for ``Bootstap.run`` method. (default: ``None``)

    """

    uvdata = UVData(uv_fits_path)

    models = list()
    for cc_fits_path in cc_fits_paths:
        ccmodel = create_model_from_fits_file(cc_fits_path)
        models.append(ccmodel)

    boot = CleanBootstrap(models, uvdata)
    if outpath is not None:
        if not os.path.exists(outpath):
            os.makedirs(outpath)
    curdir = os.getcwd()
    os.chdir(outpath)
    boot.run(n=n,
             outname=outname,
             nonparametric=False,
             use_v=False,
             use_kde=True)
    os.chdir(curdir)
Exemple #12
0
                                  use_V=use_V, use_weights=use_weights)
        self.lnpr = LnPrior(model)

    def __call__(self, p):
        lnpr = self.lnpr(p[:])
        if not np.isfinite(lnpr):
            return -np.inf
        return self.lnlik(p[:]) + lnpr


if __name__ == '__main__':
    from spydiff import import_difmap_model
    from uv_data import UVData
    from model import Model, Jitter
    uv_fits = '/home/ilya/code/vlbi_errors/pet/0235+164_X.uvf_difmap'
    uvdata = UVData(uv_fits)
    # Create model
    mdl = Model(stokes='RR')
    comps = import_difmap_model('0235+164_X.mdl',
                                '/home/ilya/code/vlbi_errors/pet')
    comps[0].add_prior(flux=(sp.stats.uniform.logpdf, [0., 10], dict(),),
                       bmaj=(sp.stats.uniform.logpdf, [0, 1], dict(),),
                       e=(sp.stats.uniform.logpdf, [0, 1.], dict(),),
                       bpa=(sp.stats.uniform.logpdf, [0, np.pi], dict(),))
    comps[1].add_prior(flux=(sp.stats.uniform.logpdf, [0., 3], dict(),),
                       bmaj=(sp.stats.uniform.logpdf, [0, 5], dict(),))
    mdl.add_components(*comps)

    # Create log of likelihood function
    lnlik = LnLikelihood(uvdata, mdl)
    lnpr = LnPrior(mdl)
Exemple #13
0
import os
import sys
sys.path.insert(0, '/home/ilya/github/vlbi_errors/vlbi_errors')
import numpy as np
from skimage.transform import rotate
import matplotlib.pyplot as plt
from uv_data import UVData
from components import ImageComponent
from model import Model


mas_to_rad = 4.8481368 * 1E-09
# uv_file = '/home/ilya/github/bck/jetshow/uvf/0716+714_raks01xg_C_LL_0060s_uva.fits'
uv_file = '/home/ilya/github/bck/jetshow/uvf/2200+420_K_SVLBI.uvf'
uvdata = UVData(uv_file)

# fig = uvdata.uvplot(stokes=["LL"])
fig = uvdata.uvplot()

images = list()
angles = range(0, 180, 30)
# image = '/home/ilya/github/bck/jetshow/uvf/map_i_09_C.txt'
image = '/home/ilya/github/bck/jetshow/cmake-build-debug/map_i.txt'
image = np.loadtxt(image)
images.append(image)

# imsize = 1096
imsize = 1734
imsize = (imsize, imsize)
# mas_in_pix = 0.005
mas_in_pix = 0.00253
Exemple #14
0
            return -np.inf
        return self.lnlik(p[:]) + lnpr


if __name__ == '__main__':
    # Test LS_estimates
    import sys
    from components import CGComponent, EGComponent
    from uv_data import UVData
    from model import Model
    try:
        from scipy.optimize import minimize, fmin
    except ImportError:
        sys.exit("install scipy for ml estimation")
    uv_fname = '/home/ilya/vlbi_errors/examples/L/1633+382/1633+382.l18.2010_05_21.uvf'
    uvdata = UVData(uv_fname)
    # Create model
    cg1 = EGComponent(1.0, -0.8, 0.2, .7, 0.5, 0)
    cg2 = CGComponent(0.8, 2.0, -.3, 2.3)
    cg3 = CGComponent(0.2, 5.0, .0, 2.)
    mdl = Model(stokes='I')
    mdl.add_components(cg1, cg2, cg3)
    # Create log of likelihood function
    lnlik = LnLikelihood(uvdata, mdl, average_freq=True, amp_only=False)
    # Nelder-Mead simplex algorithm
    p_ml = fmin(lambda p: -lnlik(p), mdl.p)
    # Various methods of minimization (some require jacobians)
    # TODO: Implement analitical grad of likelihood (it's gaussian)
    fit = minimize(lambda p: -lnlik(p),
                   mdl.p,
                   method='L-BFGS-B',
Exemple #15
0
def coverage_of_model(original_uv_fits,
                      original_mdl_file,
                      outdir=None,
                      n_cov=100,
                      n_boot=300,
                      mapsize=(1024, 0.1),
                      path_to_script=None):
    """
    Conduct coverage analysis of uv-data & model

    :param original_uv_fits:
        Self-calibrated uv-fits file.
    :param original_mdl_file:
        Difmap txt-file with model.
    :param outdir:
        Output directory to store results.
    :param n_cov:
        Number of samples to create.
    """
    # Create sample of 100 uv-fits data & models
    sample_uv_fits_paths, sample_model_paths = create_sample(original_uv_fits,
                                                             original_mdl_file,
                                                             outdir=outdir,
                                                             n_sample=n_cov)

    # For each sample uv-fits & model find 1) conventional errors & 2) bootstrap
    # errors
    for j, (sample_uv_fits_path, sample_mdl_path) in enumerate(
            zip(sample_uv_fits_paths, sample_model_paths)):
        sample_uv_fits, dir = os.path.split(sample_uv_fits_path)
        sample_mdl_file, dir = os.path.split(sample_mdl_path)
        try:
            comps = import_difmap_model(sample_mdl_file, dir)
        except ValueError:
            print('Problem import difmap model')
        model = Model(stokes='I')
        model.add_components(*comps)

        # Find errors by using Fomalont way
        # 1. Clean uv-data
        clean_difmap(sample_uv_fits,
                     'sample_cc_{}.fits'.format(j),
                     'I',
                     mapsize,
                     path=dir,
                     path_to_script=path_to_script,
                     outpath=dir)
        # 2. Get beam
        ccimage = create_clean_image_from_fits_file(
            os.path.join(dir, 'sample_cc_{}.fits'.format(j)))
        beam = ccimage.beam_image

        # 2. Subtract components convolved with beam
        ccimage.substract_model(model)

        # Find errors by using Lee way
        # a) fit uv-data and find model
        # b) CLEAN uv-data
        # c) substract model from CLEAN image
        # d) find errors
        pass

        # Find errors by using bootstrap
        # FT model to uv-plane
        uvdata = UVData(sample_uv_fits_path)
        try:
            boot = CleanBootstrap([model], uvdata)
        # If uv-data contains only one Stokes parameter (e.g. `0838+133`)
        except IndexError:
            print('Problem bootstrapping')
        curdir = os.getcwd()
        os.chdir(dir)
        boot.run(n=n_boot, nonparametric=True, outname=[outname, '.fits'])
        os.chdir(curdir)

        booted_uv_paths = sorted(
            glob.glob(os.path.join(data_dir, outname + "*")))
        # Modelfit bootstrapped uvdata
        for booted_uv_path in booted_uv_paths:
            path, booted_uv_file = os.path.split(booted_uv_path)
            i = booted_uv_file.split('_')[-1].split('.')[0]
            modelfit_difmap(booted_uv_file,
                            dfm_model_fname,
                            dfm_model_fname + '_' + i,
                            path=path,
                            mdl_path=data_dir,
                            out_path=data_dir)

        # Get params of initial model used for bootstrap
        comps = import_difmap_model(dfm_model_fname, data_dir)
        comps_params0 = {i: [] for i in range(len(comps))}
        for i, comp in enumerate(comps):
            comps_params0[i].extend(list(comp.p))

        # Load bootstrap models
        booted_mdl_paths = glob.glob(
            os.path.join(data_dir, dfm_model_fname + "_*"))
        comps_params = {i: [] for i in range(len(comps))}
        for booted_mdl_path in booted_mdl_paths:
            path, booted_mdl_file = os.path.split(booted_mdl_path)
            comps = import_difmap_model(booted_mdl_file, path)
            for i, comp in enumerate(comps):
                comps_params[i].extend(list(comp.p))

        # Print 65-% intervals (1 sigma)
        for i, comp in enumerate(comps):
            errors_fname = '68_{}_{}_comp{}.txt'.format(source, last_epoch, i)
            fn = open(os.path.join(data_dir, errors_fname), 'w')
            print "Component #{}".format(i + 1)
            for j in range(len(comp)):
                low, high, mean, median = hdi_of_mcmc(np.array(
                    comps_params[i]).reshape((n_boot, len(comp))).T[j],
                                                      cred_mass=0.68,
                                                      return_mean_median=True)
                fn.write("{} {} {} {} {}".format(comp.p[j], low, high, mean,
                                                 median))
                fn.write("\n")
            fn.close()

    # For source in sources with component close to core
    # 1. Find residuals or estimate noise
    # 2. N times add resampled residuals (or just gaussian noise) to model and
    # create N new datasets
    # 3. Fit them using difmap.
    # 4. Find errors using Fomalont, Yee and using bootstrap. Check coverage.
    base_dir = '/home/ilya/vlbi_errors/model_cov'
    n_boot = 300
    outname = 'boot_uv'
    names = [
        'source', 'id', 'trash', 'epoch', 'flux', 'r', 'pa', 'bmaj', 'e', 'bpa'
    ]
    df = pd.read_table(os.path.join(base_dir, 'asu.tsv'),
                       sep=';',
                       header=None,
                       names=names,
                       dtype={key: str
                              for key in names},
                       index_col=False)

    # Mow for all sources get the latest epoch and create directory for analysis
    for source in df['source'].unique():
        epochs = df.loc[df['source'] == source]['epoch']
        last_epoch_ = list(epochs)[-1]
        last_epoch = last_epoch_.replace('-', '_')
        data_dir = os.path.join(base_dir, source, last_epoch)
        if not os.path.exists(data_dir):
            os.makedirs(data_dir)
        try:
            download_mojave_uv_fits(source,
                                    epochs=[last_epoch],
                                    bands=['u'],
                                    download_dir=data_dir)
        except:
            open(
                'problem_download_from_mojave_{}_{}'.format(
                    source, last_epoch), 'a').close()
            continue
        uv_fits_fname = mojave_uv_fits_fname(source, 'u', last_epoch)

        # Create instance of Model and bootstrap uv-data
        dfm_model_fname = 'dfmp_original_model.mdl'
        fn = open(os.path.join(data_dir, dfm_model_fname), 'w')
        model_df = df.loc[np.logical_and(df['source'] == source,
                                         df['epoch'] == last_epoch_)]
        for (flux, r, pa, bmaj, e, bpa) in np.asarray(
                model_df[['flux', 'r', 'pa', 'bmaj', 'e', 'bpa']]):
            print flux, r, pa, bmaj, e, bpa
            if not r.strip(' '):
                r = '0.0'
            if not pa.strip(' '):
                pa = '0.0'

            if not bmaj.strip(' '):
                bmaj = '0.0'
            if not e.strip(' '):
                e = "1.0"

            if np.isnan(float(bpa)):
                bpa = "0.0"
            else:
                bpa = bpa + 'v'

            if bmaj == '0.0':
                type_ = 0
                bpa = "0.0"
            else:
                bmaj = bmaj + 'v'
                type_ = 1
            fn.write("{}v {}v {}v {} {} {} {} {} {}".format(
                flux, r, pa, bmaj, e, bpa, type_, "0", "0\n"))
        fn.close()
Exemple #16
0
import os
from uv_data import UVData
from model import Model
from spydiff import import_difmap_model
from bootstrap import CleanBootstrap

data_dir = '/home/ilya/code/vlbi_errors/tests/ft'
uv_fits = '1308+326.U1.2009_08_28.UV_CAL'
uvdata = UVData(os.path.join(data_dir, uv_fits))
model = Model(stokes='I')
comps = import_difmap_model('1308+326.U1.2009_08_28.mdl', data_dir)
model.add_components(*comps)
boot = CleanBootstrap([model], uvdata)
fig = boot.data.uvplot()
boot.model_data.uvplot(fig=fig, color='r')
# boot.find_outliers_in_residuals()
# boot.find_residuals_centers(split_scans=False)
# boot.fit_residuals_kde(split_scans=False, combine_scans=False,
#                        recenter=True)
Exemple #17
0
def score(uv_fits_path,
          mdl_path,
          stokes='I',
          bmaj=None,
          score="l2",
          use_weights=True):
    """
    Returns rms of the trained model (CLEAN or difmap) on a given test UVFITS
    data set.

    :param uv_fits_path:
        Path to uv-fits file (test data).
    :param mdl_path:
        Path to difmap model text file or FITS-file with CLEAN model (trained
        model).
    :param stokes: (optional)
        Stokes parameter string. ``I``, ``RR`` or ``LL`` currently supported.
        (default: ``I``)
    :param bmaj: (optional)
        FWHM of the circular beam to account for. If ``None`` than do not
        account for the beam. (default: ``None``)
    :return:
        Per-point rms between given test data and trained model evaluated at a
        given test data points.
    """
    stokes = stokes.upper()
    if stokes not in ('I', 'RR', 'LL'):
        raise Exception("Only stokes I, RR or LL are supported!")

    if bmaj is not None:
        c = (np.pi * bmaj * mas_to_rad)**2 / (4 * np.log(2))
    else:
        c = 1.0

    # Loading test data with its own big mask
    uvdata = UVData(uv_fits_path)
    uvdata_model = UVData(uv_fits_path)

    # Loading trained model
    # CC-model
    try:
        model = create_model_from_fits_file(mdl_path)
    # Difmap model
    except IOError:
        dfm_mdl_dir, dfm_mdl_fname = os.path.split(mdl_path)
        comps = import_difmap_model(dfm_mdl_fname, dfm_mdl_dir)
        model = Model(stokes=stokes)
        model.add_components(*comps)

    # Computing difference and score
    uvdata_model.substitute([model])
    uvdata_diff = uvdata - uvdata_model
    if stokes == 'I':
        i_diff = 0.5 * (uvdata_diff.uvdata_weight_masked[..., 0] +
                        uvdata_diff.uvdata_weight_masked[..., 1])
        weights = uvdata.weights_nw_masked[...,
                                           0] + uvdata.weights_nw_masked[...,
                                                                         1]
    elif stokes == 'RR':
        i_diff = uvdata_diff.uvdata_weight_masked[..., 0]
        weights = uvdata.weights_nw_masked[..., 0]
    elif stokes == 'LL':
        i_diff = uvdata_diff.uvdata_weight_masked[..., 1]
        weights = uvdata.weights_nw_masked[..., 1]
    else:
        raise Exception("Only stokes (I, RR, LL) supported!")

    # Normalize weights
    weights = weights / np.ma.sum(weights)

    # Account for beam
    if bmaj is not None:
        u = uvdata_diff.uv[:, 0]
        v = uvdata_diff.uv[:, 1]
        taper = np.exp(-c * (u * u + v * v))
        i_diff = i_diff * taper[:, np.newaxis]

    # Number of unmasked visibilities (accounting each IF)
    if stokes == "I":
        # 2 means that Re & Im are counted independently
        factor = 2 * np.count_nonzero(~i_diff.mask)
    else:
        factor = np.count_nonzero(~i_diff.mask)

    print("Number of independent test data points = ", factor)
    if score == "l2":
        if use_weights:
            result = np.sqrt(
                (np.ma.sum(i_diff * i_diff.conj() * weights)).real)
        else:
            result = np.sqrt((np.ma.sum(i_diff * i_diff.conj())).real / factor)
    elif score == "l1":
        if use_weights:
            result = (np.ma.sum(np.abs(i_diff) * weights)).real
        else:
            result = (np.ma.sum(np.abs(i_diff))).real / factor
    else:
        raise Exception("score must be in (l1, l2)!")
    return result
Exemple #18
0
        epoch = source_dict[source][0][5]
        source_dir = os.path.join(base_dir, source, epoch)
        if not os.path.exists(source_dir):
            os.makedirs(source_dir)

        get_mojave_mdl_file(os.path.join(base_dir, 'asu.tsv'),
                            source,
                            epoch,
                            outdir=source_dir)
        epoch_ = "{}_{}_{}".format(*epoch.split('-'))
        download_mojave_uv_fits(source,
                                epochs=[epoch_],
                                download_dir=source_dir,
                                bands=['u'])
        fname = mojave_uv_fits_fname(source, 'u', epoch_)
        uvdata = UVData(os.path.join(source_dir, fname))
        print(uvdata.stokes)
        if 'RR' not in uvdata.stokes or 'LL' not in uvdata.stokes:
            continue

        # Refit difmap model
        modelfit_difmap(fname,
                        "{}_{}.mdl".format(source, epoch),
                        "{}_{}.mdl".format(source, epoch),
                        niter=200,
                        path=source_dir,
                        mdl_path=source_dir,
                        out_path=source_dir,
                        show_difmap_output=True)

        # Create sample of 100 artificial data sets
Exemple #19
0
    # epoch_ = '2007-07-03'
    source = '0336-019'
    epoch = '2010_10_25'
    epoch_ = '2010-10-25'
    data_dir = '/home/ilya/github/vlbi_errors/examples/LC'
    data_dir = os.path.join(data_dir, source, epoch)
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)
    # download_mojave_uv_fits(source, [epoch], download_dir=data_dir)
    path_to_script = '/home/ilya/github/vlbi_errors/difmap/final_clean_nw'

    uv_fits_fname = mojave_uv_fits_fname(source, 'u', epoch)
    uv_fits_path = os.path.join(data_dir, uv_fits_fname)
    # get_mojave_mdl_file('/home/ilya/Dropbox/papers/boot/new_pics/mojave_mod_first/asu.tsv',
    #                     source, epoch_, outfile='initial.mdl', outdir=data_dir)
    uvdata = UVData(uv_fits_path)
    # modelfit_difmap(uv_fits_fname, 'initial.mdl',
    #                 'initial.mdl', niter=300,
    #                 path=data_dir, mdl_path=data_dir,
    #                 out_path=data_dir)
    original_model_path = os.path.join(data_dir, 'initial.mdl')
    from spydiff import import_difmap_model, clean_difmap
    comps = import_difmap_model(original_model_path)
    from automodel import plot_clean_image_and_components
    path_to_script = '/home/ilya/github/vlbi_errors/difmap/final_clean_nw'

    # clean_difmap(uv_fits_path, os.path.join(data_dir, 'cc.fits'), 'I',
    #              (1024, 0.1), path=data_dir, path_to_script=path_to_script,
    #              outpath=data_dir)
    from from_fits import create_clean_image_from_fits_file
    ccimage = create_clean_image_from_fits_file(
Exemple #20
0
data_dir = '/home/ilya/Dropbox/papers/boot/bias/new/stationary'
# download_mojave_uv_fits(source, epochs=[epoch], bands=['u'],
#                         download_dir=data_dir)

uv_fits_fnames = {
    freq: mojave_uv_fits_fname(source, freq, epoch)
    for freq in ('x', 'j', 'u')
}
for freq, uv_fits_fname in uv_fits_fnames.items():
    uv_fits_path = os.path.join(data_dir, uv_fits_fname)
    cg1 = CGComponent(2.0, 0., 0., 0.2)
    cg2 = CGComponent(1.0, 0., 0.3, 0.3)
    cg3 = CGComponent(0.5, 0., 1.5, 0.4)
    mdl = Model(stokes='I')
    mdl.add_components(cg1, cg2, cg3)
    uvdata = UVData(uv_fits_path)
    noise = uvdata.noise()
    for i in range(1, 101):
        uvdata = UVData(uv_fits_path)
        uvdata.substitute([mdl])
        uvdata.noise_add(noise)
        art_fits_fname = 'art_{}_{}.fits'.format(freq, i)
        art_fits_path = os.path.join(data_dir, art_fits_fname)
        uvdata.save(art_fits_path)

        # Here we should MCMC posterior
        modelfit_difmap(art_fits_fname,
                        'initial.mdl',
                        'out_{}_{}.mdl'.format(freq, i),
                        niter=100,
                        path=data_dir,
Exemple #21
0
def create_coverage_map(original_uv_fits_path,
                        ci_type,
                        original_cc_fits_path=None,
                        imsize=None,
                        outdir=None,
                        n_boot=200,
                        path_to_script=None,
                        alpha=0.68,
                        n_cov=100,
                        n_rms=1.,
                        stokes='I',
                        boot_cc_fits_paths=None,
                        sample_cc_fits_paths=None):
    """
    Conduct coverage analysis of image pixels flux CI. Find number of times
    when CI of `observed` value contains values of `samples`.

    :param original_uv_fits_path:
        Path to original FITS-file with uv-data.
    :param ci_type:
        Type of CI to test. ``boot`` or ``rms``. If ``boot`` then use residuals
        bootstrap CI. If ``rms`` then use Hovatta corrected image rms CI.
    :param original_cc_fits_path: (optional)
        Path to original FITS-file with CC model. If ``None`` then use
        ``imsize`` parameter to get `original` CC model from
        ``original_uv_fits_path``. (default: ``None``)
    :param imsize: (optional)
        Image parameters (image size [pix], pixel size [mas]) to use
        when doing first CC with ``original_cc_fits_path = None``. (default:
        ``None``)
    :param outdir: (optional)
        Directory to store intermediate results. If ``None`` then use CWD.
        (default: ``None``)
    :param n_boot: (optional)
        Number of bootstrap replications to use when calculating bootstrap CI
        for ``ci_type = boot`` option when ``boot_cc_fits_paths`` hasn't
        specified. (default: ``200``)
    :param path_to_script: (optional)
        Path to Dan Homan's script for final clean. If ``None`` then use CWD.
        (default: ``None``)
    :param alpha: (optional)
        Level of significance when calculating bootstrap CI for ``ci_type =
        boot`` case. E.g. ``0.68`` corresponds to `1 \sigma`. (default:
        ``0.68``)
    :param n_cov: (optional)
        Number of `samples` from infinite population to consider in coverage
        analysis of intervals. Here `samples` - observations of known source
        with different realisations of noise with known parameters. (default:
         ``100``)
    :param n_rms: (optional)
        Number of rms to use in ``ci_type = rms`` case. (default: ``1.``)
    :param stokes: (optional)
        Stokes parameter to use. If ``None`` then use ``I``. (default: ``None``)
    :param boot_cc_fits_paths: (optional)
        If ``ci_type = boot`` then this parameter could specify paths to cleaned
        bootstrapped uv-data.
    :param sample_cc_fits_paths: (optional)
        Path to FITS-files with CLEAN models of `sample` uv-data. If ``None``
        then create ``n_cov`` `sample` uv-data from noise of `original` uv-data
        and `original` CLEAN model. (default: ``None``)

    :return:
        Coverage map. Each pixel contain frequency of times when samples from
        population hit inside CI for given pixel.

    """

    # If not given `original` CLEAN model - get it by cleaning `original`
    # uv-data
    if original_cc_fits_path is None:
        print(
            "No `original` CLEAN model specified! Will CLEAN `original`"
            " uv-data.")
        if imsize is None:
            raise Exception("Specify ``imsize``")
        uv_fits_dir, uv_fits_fname = os.path.split(original_uv_fits_path)
        print("Cleaning `original` uv-data to"
              " {}".format(os.path.join(outdir, 'cc.fits')))
        clean_difmap(uv_fits_fname,
                     'cc.fits',
                     stokes,
                     imsize,
                     path=uv_fits_dir,
                     path_to_script=path_to_script,
                     outpath=outdir)
        original_cc_fits_path = os.path.join(outdir, 'cc.fits')

    original_uv_data = UVData(original_uv_fits_path)
    noise = original_uv_data.noise()
    original_model = create_model_from_fits_file(original_cc_fits_path)
    # Find images parameters for cleaning if necessary
    if imsize is None:
        print(
            "Getting image parameters from `original`"
            " CLEAN FITS file {}.".format(original_cc_fits_path))
        image_params = get_fits_image_info(original_cc_fits_path)
        imsize = (image_params['imsize'][0],
                  abs(image_params['pixsize'][0]) / mas_to_rad)

    # Substitute uv-data with original model and create `model` uv-data
    print("Substituting original uv-data with CLEAN model...")
    model_uv_data = copy.deepcopy(original_uv_data)
    model_uv_data.substitute([original_model])

    # Add noise to `model` uv-data to get `observed` uv-data
    observed_uv_data = copy.deepcopy(model_uv_data)
    observed_uv_data.noise_add(noise)
    observed_uv_fits_path = os.path.join(outdir, 'observed_uv.uvf')
    if os.path.isfile(observed_uv_fits_path):
        os.unlink(observed_uv_fits_path)
    print("Adding noise to `model` uv-data to get `observed` uv-data...")
    observed_uv_data.save(fname=observed_uv_fits_path)

    observed_cc_fits_path = os.path.join(outdir, 'observed_cc.fits')
    if os.path.isfile(observed_cc_fits_path):
        os.unlink(observed_cc_fits_path)
    # Clean `observed` uv-data to get `observed` image and model
    print("Cleaning `observed` uv-data to `observed` CLEAN model...")
    clean_difmap('observed_uv.uvf',
                 'observed_cc.fits',
                 original_model.stokes,
                 imsize,
                 path=outdir,
                 path_to_script=path_to_script,
                 outpath=outdir)
    # Get `observed` model and image
    observed_model = create_model_from_fits_file(observed_cc_fits_path)
    observed_image = create_image_from_fits_file(observed_cc_fits_path)

    # Testing coverage of bootstrapped CI
    if ci_type == 'boot':
        # Bootstrap and clean only when necessary
        if boot_cc_fits_paths is None:
            # Bootstrap `observed` uv-data with `observed` model
            boot = CleanBootstrap([observed_model], observed_uv_data)
            cwd = os.getcwd()
            path_to_script = path_to_script or cwd
            os.chdir(outdir)
            print("Bootstrapping uv-data with {} replications".format(n_boot))
            boot.run(outname=['observed_uv_boot', '.uvf'], n=n_boot)
            os.chdir(cwd)

            boot_uv_fits_paths = sorted(
                glob.glob(os.path.join(outdir, 'observed_uv_boot*.uvf')))
            # Clean each bootstrapped uv-data
            for i, uv_fits_path in enumerate(boot_uv_fits_paths):
                uv_fits_dir, uv_fits_fname = os.path.split(uv_fits_path)
                print("Cleaning {} bootstrapped observed"
                      " uv-data to {}".format(
                          uv_fits_path,
                          os.path.join(
                              outdir,
                              'observed_cc_boot_{}.fits'.format(i + 1))))
                clean_difmap(uv_fits_fname,
                             'observed_cc_boot_{}.fits'.format(i + 1),
                             original_model.stokes,
                             imsize,
                             path=uv_fits_dir,
                             path_to_script=path_to_script,
                             outpath=outdir)

            boot_cc_fits_paths = glob.glob(
                os.path.join(outdir, 'observed_cc_*.fits'))

        # Calculate bootstrap CI
        # hdi_low, hdi_high = boot_ci_bc(boot_cc_fits_paths,
        #                                observed_cc_fits_path, alpha=alpha)
        hdi_low, hdi_high = boot_ci(boot_cc_fits_paths,
                                    observed_cc_fits_path,
                                    alpha=alpha)
    elif ci_type == 'rms':
        # Calculate ``n_rms`` CI
        rms = observed_image.rms(region=(50, 50, 50, None))
        rms = np.sqrt(rms**2. + (1.5 * rms**2.)**2.)
        hdi_low = observed_image.image - rms
        hdi_high = observed_image.image + rms
    else:
        raise Exception("CI intervals must be `boot` or `rms`!")

    # Create `sample` uv-data and clean it only when necessary
    if sample_cc_fits_paths is None:
        # Add noise to `model` uv-data ``n_cov`` times and get ``n_cov``
        # `samples` from population
        sample_uv_fits_paths = list()
        for i in range(n_cov):
            sample_uv_data = copy.deepcopy(model_uv_data)
            sample_uv_data.noise_add(noise)
            sample_uv_fits_path = os.path.join(outdir,
                                               'samle_uv_{}.uvf'.format(i + 1))
            sample_uv_data.save(sample_uv_fits_path)
            sample_uv_fits_paths.append(sample_uv_fits_path)

        # Clean each `sample` FITS-file
        for i, uv_fits_path in enumerate(sample_uv_fits_paths):
            uv_fits_dir, uv_fits_fname = os.path.split(uv_fits_path)
            print("Cleaning {} sample uv-data to"
                  " {}".format(
                      uv_fits_path,
                      os.path.join(outdir, 'sample_cc_{}.fits'.format(i + 1))))
            clean_difmap(uv_fits_fname,
                         'sample_cc_{}.fits'.format(i + 1),
                         original_model.stokes,
                         imsize,
                         path=uv_fits_dir,
                         path_to_script=path_to_script,
                         outpath=outdir)

        sample_cc_fits_paths = glob.glob(
            os.path.join(outdir, 'sample_cc_*.fits'))

    sample_images = list()
    for sample_cc_fits_path in sample_cc_fits_paths:
        image = create_image_from_fits_file(sample_cc_fits_path)
        sample_images.append(image.image)

    # For each pixel check how often flux in `sample` images lies in CI derived
    # for observed image.
    cov_array = np.zeros((imsize[0], imsize[0]), dtype=float)
    print("calculating CI intervals")
    for (x, y), value in np.ndenumerate(cov_array):
        for image in sample_images:
            cov_array[x, y] += float(
                np.logical_and(hdi_low[x, y] < image[x, y],
                               image[x, y] < hdi_high[x, y]))

    return cov_array / n_cov
Exemple #22
0
clean_difmap(uv_fits_x, 'x_cc.fits', 'I', (1024, 0.1), path=uvdata_dir,
             path_to_script=path_to_script, show_difmap_output=True,
             outpath=data_dir)

# Clean original uv-data with common beam
clean_difmap(uv_fits_x, 'x_cc_same.fits', 'I', (1024, 0.1), path=uvdata_dir,
             path_to_script=path_to_script, show_difmap_output=True,
             outpath=data_dir)
ccimage_x = create_clean_image_from_fits_file(os.path.join(data_dir, 'x_cc.fits'))
clean_difmap(uv_fits_u, 'u_cc_same.fits', 'I', (1024, 0.1), path=uvdata_dir,
             path_to_script=path_to_script, show_difmap_output=True,
             outpath=data_dir, beam_restore=ccimage_x.beam)

u_model = create_model_from_fits_file(os.path.join(data_dir, 'u_cc.fits'))
x_model = create_model_from_fits_file(os.path.join(data_dir, 'x_cc.fits'))
u_uvdata = UVData(os.path.join(uvdata_dir, uv_fits_u))
x_uvdata = UVData(os.path.join(uvdata_dir, uv_fits_x))

# Bootstrap uv-data with original CLEAN models
xboot = CleanBootstrap([x_model], x_uvdata)
xboot.run(100, nonparametric=True, use_v=False, outname=['boot_x', '.fits'])
uboot = CleanBootstrap([u_model], u_uvdata)
uboot.run(100, nonparametric=True, use_v=False, outname=['boot_u', '.fits'])

# Clean bootstrapped uv-data with common parameters
x_boot_uvfits = sorted(glob.glob('boot_x_*.fits'))
u_boot_uvfits = sorted(glob.glob('boot_u_*.fits'))
for i, x_boot_uv in enumerate(x_boot_uvfits):
    clean_difmap(x_boot_uv, 'x_cc_same_{}.fits'.format(str(i+1).zfill(3)), 'I',
                 (1024, 0.1),
                 path_to_script=path_to_script, show_difmap_output=True,
Exemple #23
0
def create_sample(original_uv_fits_path,
                  original_cc_fits_path=None,
                  imsize=None,
                  outdir=None,
                  path_to_script=None,
                  n_sample=100,
                  stokes='I'):
    """
    Create `sample` from `true` or `model` source

    :param original_uv_fits_path:
        Path to original FITS-file with uv-data.
    :param original_cc_fits_path: (optional)
        Path to original FITS-file with CC model. If ``None`` then use
        ``imsize`` parameter to get `original` CC model from
        ``original_uv_fits_path``. (default: ``None``)
    :param imsize: (optional)
        Image parameters (image size [pix], pixel size [mas]) to use
        when doing first CC with ``original_cc_fits_path = None``. (default:
        ``None``)
    :param outdir: (optional)
        Directory to store intermediate results. If ``None`` then use CWD.
        (default: ``None``)
    :param path_to_script: (optional)
        Path to Dan Homan's script for final clean. If ``None`` then use CWD.
        (default: ``None``)
    :param n_sample: (optional)
        Number of `samples` from infinite population to consider in coverage
        analysis of intervals. Here `samples` - observations of known source
        with different realisations of noise with known parameters. (default:
         ``100``)
    :param stokes: (optional)
        Stokes parameter to use. If ``None`` then use ``I``. (default: ``None``)

    :return:
        Creates FITS-files with uv-data and CLEAN models of `sample`.
    """

    # If not given `original` CLEAN model - get it by cleaning `original`
    # uv-data
    if original_cc_fits_path is None:
        print(
            "No `original` CLEAN model specified! Will CLEAN `original`"
            " uv-data.")
        if imsize is None:
            raise Exception("Specify ``imsize``")
        uv_fits_dir, uv_fits_fname = os.path.split(original_uv_fits_path)
        original_cc_fits_path = os.path.join(outdir, 'original_cc.fits')
        print(
            "Cleaning `original` uv-data to {}".format(original_cc_fits_path))
        clean_difmap(uv_fits_fname,
                     'original_cc.fits',
                     stokes,
                     imsize,
                     path=uv_fits_dir,
                     path_to_script=path_to_script,
                     outpath=outdir)

    original_uv_data = UVData(original_uv_fits_path)
    noise = original_uv_data.noise()
    original_model = create_model_from_fits_file(original_cc_fits_path)
    # Find images parameters for cleaning if necessary
    if imsize is None:
        print(
            "Getting image parameters from `original`"
            " CLEAN FITS file {}.".format(original_cc_fits_path))
        image_params = get_fits_image_info(original_cc_fits_path)
        imsize = (image_params['imsize'][0],
                  abs(image_params['pixsize'][0]) / mas_to_rad)

    # Substitute uv-data with original model and create `model` uv-data
    print("Substituting `original` uv-data with CLEAN model...")
    model_uv_data = copy.deepcopy(original_uv_data)
    model_uv_data.substitute([original_model])

    # Create `sample` uv-data
    # Add noise to `model` uv-data ``n_cov`` times and get ``n_cov`` `samples`
    # from population
    sample_uv_fits_paths = list()
    print("Creating {} `samples` from population".format(n_sample))
    for i in range(n_sample):
        sample_uv_data = copy.deepcopy(model_uv_data)
        sample_uv_data.noise_add(noise)
        sample_uv_fits_path = os.path.join(
            outdir, 'sample_uv_{}.uvf'.format(str(i + 1).zfill(3)))
        sample_uv_data.save(sample_uv_fits_path)
        sample_uv_fits_paths.append(sample_uv_fits_path)

    # Clean each `sample` FITS-file
    print("CLEANing `samples` uv-data")
    for uv_fits_path in sample_uv_fits_paths:
        uv_fits_dir, uv_fits_fname = os.path.split(uv_fits_path)
        j = uv_fits_fname.split('.')[0].split('_')[-1]
        print("Cleaning {} sample uv-data to"
              " {}".format(uv_fits_path,
                           os.path.join(outdir,
                                        'sample_cc_{}.fits'.format(j))))
        clean_difmap(uv_fits_fname,
                     'sample_cc_{}.fits'.format(j),
                     original_model.stokes,
                     imsize,
                     path=uv_fits_dir,
                     path_to_script=path_to_script,
                     outpath=outdir)

    sample_cc_fits_paths = sorted(
        glob.glob(os.path.join(outdir, 'sample_cc_*.fits')))
    sample_uv_fits_paths = sorted(
        glob.glob(os.path.join(outdir, 'sample_uv_*.uvf')))
    return sample_uv_fits_paths, sample_cc_fits_paths
Exemple #24
0
def fit_model_with_nestle(uv_fits, model_file, components_priors, outdir=None,
                          **nestle_kwargs):
    """
    :param uv_fits:
        Path to uv-fits file with self-calibrated visibilities.
    :param model_file:
        Path to file with difmap model.
    :param components_priors:
        Components prior's ppf. Close to phase center component goes first.
        Iterable of dicts with keys - name of the parameter and values -
        (callable, args, kwargs,) where args & kwargs - additional arguments to
        callable. Each callable is called callable.ppf(p, *args, **kwargs).
        Thus callable should has ``ppf`` method.

        Example of prior on single component:
            {'flux': (scipy.stats.uniform.ppf, [0., 10.], dict(),),
             'bmaj': (scipy.stats.uniform.ppf, [0, 5.], dict(),),
             'e': (scipy.stats.beta.ppf, [alpha, beta], dict(),)}
        First key will result in calling: scipy.stats.uniform.ppf(u, 0, 10) as
        value from prior for ``flux`` parameter.
    :param outdir: (optional)
        Directory to output results. If ``None`` then use cwd. (default:
        ``None``)
    :param nestle_kwargs: (optional)
        Any arguments passed to ``nestle.sample`` function.

    :return
        Results of ``nestle.sample`` work on that model.
    """
    if outdir is None:
        outdir = os.getcwd()

    mdl_file = model_file
    uv_data = UVData(uv_fits)
    mdl_dir, mdl_fname = os.path.split(mdl_file)
    comps = import_difmap_model(mdl_fname, mdl_dir)

    # Sort components by distance from phase center
    comps = sorted(comps, key=lambda x: np.sqrt(x.p[1]**2 + x.p[2]**2))

    ppfs = list()
    labels = list()
    for component_prior in components_priors:
        for comp_name in ('flux', 'x', 'y', 'bmaj', 'e', 'bpa'):
            try:
                ppfs.append(_function_wrapper(*component_prior[comp_name]))
                labels.append(comp_name)
            except KeyError:
                pass

    for ppf in ppfs:
        print(ppf.args)

    hypercube = hypercube_partial(ppfs)

    # Create model
    mdl = Model(stokes=stokes)
    # Add components to model
    mdl.add_components(*comps)
    loglike = LnLikelihood(uv_data, mdl)
    time0 = time.time()
    result = nestle.sample(loglikelihood=loglike, prior_transform=hypercube,
                           ndim=mdl.size, npoints=50, method='multi',
                           callback=nestle.print_progress, **nestle_kwargs)
    print("Time spent : {}".format(time.time()-time0))
    samples = nestle.resample_equal(result.samples, result.weights)
    # Save re-weighted samples from posterior to specified ``outdir``
    # directory
    np.savetxt(os.path.join(outdir, 'samples.txt'), samples)
    fig = corner.corner(samples, show_titles=True, labels=labels,
                        quantiles=[0.16, 0.5, 0.84], title_fmt='.3f')
    # Save corner plot os samples from posterior to specified ``outdir``
    # directory
    fig.savefig(os.path.join(outdir, "corner.png"), bbox_inches='tight',
                dpi=200)

    return result
Exemple #25
0
def create_sample(original_uv_fits,
                  original_mdl_file,
                  outdir=None,
                  n_sample=100,
                  stokes='I'):
    """
    Create `sample` from `true` or `model` source

    :param outdir: (optional)
        Directory to store intermediate results. If ``None`` then use CWD.
        (default: ``None``)
    :param n_sample: (optional)
        Number of `samples` from infinite population to consider in coverage
        analysis of intervals. Here `samples` - observations of known source
        with different realisations of noise with known parameters. (default:
         ``100``)
    :param stokes: (optional)
        Stokes parameter to use. If ``None`` then use ``I``. (default: ``None``)
    """
    original_uv_data = UVData(original_uv_fits)
    noise = original_uv_data.noise()
    path, _ = os.path.split(original_mdl_file)
    comps = import_difmap_model(original_mdl_file, path)
    original_model = Model(stokes=stokes)
    original_model.add_components(*comps)

    # Substitute uv-data with original model and create `model` uv-data
    print("Substituting `original` uv-data with CLEAN model...")
    model_uv_data = copy.deepcopy(original_uv_data)
    model_uv_data.substitute([original_model])

    # Create `sample` uv-data
    # Add noise to `model` uv-data ``n_cov`` times and get ``n_cov`` `samples`
    # from population
    sample_uv_fits_paths = list()
    print("Creating {} `samples` from population".format(n_sample))
    for i in range(n_sample):
        sample_uv_data = copy.deepcopy(model_uv_data)
        sample_uv_data.noise_add(noise)
        sample_uv_fits_path = os.path.join(
            outdir, 'sample_uv_{}.uvf'.format(str(i + 1).zfill(3)))
        sample_uv_data.save(sample_uv_fits_path)
        sample_uv_fits_paths.append(sample_uv_fits_path)

    # Fitting in difmap each `sample` FITS-file
    print("Fitting `samples` uv-data")
    for uv_fits_path in sample_uv_fits_paths:
        uv_fits_dir, uv_fits_fname = os.path.split(uv_fits_path)
        j = uv_fits_fname.split('.')[0].split('_')[-1]
        print("Fitting {} sample uv-data to"
              " {}".format(
                  uv_fits_path,
                  os.path.join(outdir, 'sample_model_{}.mdl'.format(j))))
        modelfit_difmap(uv_fits_fname,
                        original_mdl_file,
                        'sample_model_{}.mdl'.format(j),
                        path=uv_fits_dir,
                        mdl_path=uv_fits_dir,
                        out_path=uv_fits_dir)

    sample_mdl_paths = sorted(
        glob.glob(os.path.join(outdir, 'sample_model_*.mdl')))
    sample_uv_fits_paths = sorted(
        glob.glob(os.path.join(outdir, 'sample_uv_*.uvf')))
    return sample_uv_fits_paths, sample_mdl_paths
Exemple #26
0
class KFoldCV(object):
    def __init__(self,
                 uv_fits_path,
                 k,
                 basename='cv',
                 seed=None,
                 baselines=None,
                 stokes='I'):
        if stokes not in ('I', 'RR', 'LL'):
            raise Exception("Only stokes (I, RR, LL) supported!")
        self.stokes = stokes
        self.uv_fits_path = uv_fits_path
        self.uvdata = UVData(uv_fits_path)
        self.k = k
        self.seed = seed
        self.basename = basename
        self.test_fname_base = "{}_test".format(basename)
        self.train_fname_base = "{}_train".format(basename)
        self.baseline_folds = None
        self.create_folds(baselines)

    def create_folds(self, baselines=None):
        baseline_folds = dict()

        if baselines is None:
            baselines = self.uvdata.baselines

        if self.stokes == 'I':
            stokes = ['RR', 'LL']
            average_stokes = True
        elif self.stokes == 'RR':
            stokes = ['RR']
            average_stokes = False
        elif self.stokes == 'LL':
            stokes = ['LL']
            average_stokes = False
        else:
            raise Exception("Only stokes (I, RR, LL) supported!")

        for bl in baselines:
            bl_indxs = self.uvdata._indxs_baselines[bl]
            print("Baseline {} has {}"
                  " samples".format(bl, np.count_nonzero(bl_indxs)))
            bl_indxs_pw = self.uvdata.pw_indxs_baseline(
                bl,
                average_bands=True,
                stokes=stokes,
                average_stokes=average_stokes)
            bl_indxs = mask_boolean_with_boolean(bl_indxs, bl_indxs_pw)
            print("Baseline {} has {} samples with"
                  " positive weight".format(bl, np.count_nonzero(bl_indxs)))

            try:
                kfold = KFold(self.k, shuffle=False, random_state=self.seed)
                baseline_folds[bl] = list()
                for train, test in kfold.split(np.count_nonzero(bl_indxs)):
                    tr = to_boolean_array(
                        np.nonzero(bl_indxs)[0][train], len(bl_indxs))
                    te = to_boolean_array(
                        np.nonzero(bl_indxs)[0][test], len(bl_indxs))
                    baseline_folds[bl].append((tr, te))
            # When ``k`` more then number of baseline samples
            except ValueError:
                pass

        # Add all other baselines data w/o folding - all data to train & nothing
        # to test
        rest_baselines = list(self.uvdata.baselines)
        for bl in baselines:
            rest_baselines.remove(bl)
        for bl in rest_baselines:
            baseline_folds[bl] = list()
        for bl in rest_baselines:
            bl_indxs = self.uvdata._indxs_baselines[bl]
            for k in range(self.k):
                baseline_folds[bl].append(
                    (bl_indxs, np.zeros(len(bl_indxs), dtype=bool)))

        self.baseline_folds = baseline_folds

    def create_train_test_data(self, outdir=None):
        if outdir is None:
            outdir = os.getcwd()
        for i in range(self.k):
            train_indxs = np.zeros(len(self.uvdata.hdu.data))
            test_indxs = np.zeros(len(self.uvdata.hdu.data))
            for bl, kfolds in self.baseline_folds.items():
                itrain, itest = kfolds[i]
                # itrain = to_boolean_array(itrain)
                train_indxs = np.logical_or(train_indxs, itrain)
                test_indxs = np.logical_or(test_indxs, itest)
            train_data = self.uvdata.hdu.data[train_indxs]
            test_data = self.uvdata.hdu.data[test_indxs]
            self.uvdata.save(os.path.join(
                outdir, self.test_fname_base + '_{}.fits'.format(i)),
                             test_data,
                             rewrite=True)
            self.uvdata.save(os.path.join(
                outdir, self.train_fname_base + '_{}.fits'.format(i)),
                             train_data,
                             rewrite=True)

    def cv_score(self,
                 initial_dfm_model_path=None,
                 data_dir=None,
                 niter=100,
                 path_to_script=None,
                 mapsize_clean=None):
        if data_dir is None:
            data_dir = os.getcwd()
        train_uv_fits_paths = sorted(
            glob.glob(os.path.join(data_dir, self.train_fname_base + '*')))
        test_uv_fits_paths = sorted(
            glob.glob(os.path.join(data_dir, self.test_fname_base + '*')))
        cv_scores = list()
        train_scores = list()
        if initial_dfm_model_path is not None:
            for i, (train_uv_fits_path, test_uv_fits_path) in enumerate(
                    zip(train_uv_fits_paths, test_uv_fits_paths)):
                print("Calculating CV-score for {} of {} splits".format(
                    i + 1, self.k))
                print("Training FITS: {}".format(train_uv_fits_path))
                print("Testing FITS: {}".format(test_uv_fits_path))
                out_mdl_fname = 'train_{}.mdl'.format(i)
                dfm_model_dir, dfm_model_fname = os.path.split(
                    initial_dfm_model_path)
                modelfit_difmap(train_uv_fits_path,
                                dfm_model_fname,
                                out_mdl_fname,
                                niter=niter,
                                path=data_dir,
                                mdl_path=dfm_model_dir,
                                out_path=data_dir,
                                stokes=self.stokes,
                                show_difmap_output=True)
                cv_scores.append(
                    score(test_uv_fits_path,
                          os.path.join(data_dir, out_mdl_fname)))
                train_scores.append(
                    score(train_uv_fits_path,
                          os.path.join(data_dir, out_mdl_fname)))
        else:
            for i, (train_uv_fits_path, test_uv_fits_path) in enumerate(
                    zip(train_uv_fits_paths, test_uv_fits_paths)):
                out_mdl_fname = 'train_{}.fits'.format(i)
                # This used when learning curves are created
                # clean_difmap(train_uv_fits_path, out_mdl_fname, 'I',
                #              mapsize_clean, data_dir, path_to_script,
                #              outpath=data_dir, show_difmap_output=True)
                # This used when different number of iterations are tested
                clean_n(
                    train_uv_fits_path,
                    out_mdl_fname,
                    'I',
                    mapsize_clean,
                    niter=niter,
                    path_to_script=path_to_script,
                    outpath=data_dir,
                    show_difmap_output=True,
                )
                cv_scores.append(
                    score(test_uv_fits_path,
                          os.path.join(data_dir, out_mdl_fname)))
                train_scores.append(
                    score(train_uv_fits_path,
                          os.path.join(data_dir, out_mdl_fname)))

        return cv_scores, train_scores
Exemple #27
0
colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k']

# Workflow for one source
source = '0945+408'
epoch = '2007_04_18'
band = 'u'
# TODO: Standard it
image_fname = 'original_cc.fits'
uv_fname_cc = '0945+408.u.2007_04_18.uvf'
uv_fname_uv = '0945+408.u.2007_04_18.uvf'
dfm_model_fname = 'dfmp_original_model.mdl'

comps = import_difmap_model(dfm_model_fname, base_path)
model_uv = Model(stokes='I')
model_uv.add_components(*comps)
uvdata = UVData(os.path.join(base_path, uv_fname_uv))
uvdata_m = UVData(os.path.join(base_path, uv_fname_uv))
uvdata_m.substitute([model_uv])
uvdata_r = uvdata - uvdata_m

# Plot uv-data
label_size = 12
matplotlib.rcParams['xtick.labelsize'] = label_size
matplotlib.rcParams['ytick.labelsize'] = label_size
uvdata.uvplot(style='re&im', freq_average=True)
matplotlib.pyplot.show()
matplotlib.pyplot.savefig('/home/ilya/sandbox/heteroboot/uvdata_original.png',
                          bbox_inches='tight', dpi=400)
matplotlib.pyplot.close()

# # Plot residuals in radplot
Exemple #28
0
cv_scores = list()
train_scores = list()
for i, fname in enumerate(['1IF.fits', '12IF.fits', '123IF.fits', '1234IF.fits',
                          '12345IF.fits', '123456IF.fits', '1234567IF.fits']):
    current_fits = os.path.join(data_dir, fname)
    modelfit_difmap(current_fits,
                    original_model_fname, 'out_{}.mdl'.format(i),
                    path=data_dir, mdl_path=data_dir,
                    out_path=data_dir, niter=100)
    comps = import_difmap_model('out_{}.mdl'.format(i), data_dir)
    model = Model(stokes='I')
    model.add_components(*comps)

    # Calculate performance on training data
    uvdata_train_model = UVData(current_fits)
    uvdata_train = UVData(current_fits)
    uvdata_train_model.substitute([model])
    uvdata_diff_train = uvdata_train - uvdata_train_model
    factor = np.count_nonzero(~uvdata_diff_train.uvdata_weight_masked.mask[:, :, :2])
    squared_diff = uvdata_diff_train.uvdata_weight_masked[:, :, :2] *\
                   uvdata_diff_train.uvdata_weight_masked[:, :, :2].conj()
    score = float(np.sum(squared_diff)) / factor
    train_scores.append(score)


    # Calculate performance on test data
    uvdata_test_model = UVData(os.path.join(data_dir, '8IF.fits'))
    uvdata_test = UVData(os.path.join(data_dir, '8IF.fits'))
    uvdata_test_model.substitute([model])
    uvdata_diff_test = uvdata_test - uvdata_test_model
Exemple #29
0
                        download_dir=data_dir)
# Fetch model file
get_mojave_mdl_file(tsv_table, source, epoch, outdir=data_dir)
# Clean uv-fits
clean_difmap(uv_fits,
             'cc.fits',
             'I', [1024, 0.1],
             path=data_dir,
             path_to_script=path_to_script,
             outpath=data_dir)

# Create clean image instance
cc_image = create_clean_image_from_fits_file(os.path.join(data_dir, 'cc.fits'))
comps = import_difmap_model(mdl_fname, data_dir)
model = Model(stokes='I')
model.add_components(*comps)

# Check that model fits UV-data well
uv_data = UVData(os.path.join(data_dir, uv_fits))
uv_data.uvplot()
mdl_data = copy.deepcopy(uv_data)
mdl_data.substitute([model])
mdl_data.uvplot(sym='.r')

cc_image_ = copy.deepcopy(cc_image)
cc_image_._image = np.zeros(cc_image._image.shape, dtype=float)
cc_image_.add_model(model)
plt.figure()
plt.matshow(cc_image_.cc_image - cc_image.cc_image)
plt.colorbar()
Exemple #30
0
    components_priors.append({'flux': (sp.stats.uniform.ppf, [0, 1], {}),
                                 'x': (sp.stats.uniform.ppf, [-2, 4], {}),
                                 'y': (sp.stats.uniform.ppf, [-2, 4], {}),
                                 'bmaj': (sp.stats.uniform.ppf, [0, 1], {})})
    components_priors.append({'flux': (sp.stats.uniform.ppf, [0, 1], {}),
                              'x': (sp.stats.uniform.ppf, [-5, 10], {}),
                              'y': (sp.stats.uniform.ppf, [-5, 10], {}),
                              'bmaj': (sp.stats.uniform.ppf, [0, 2], {})})
    components_priors.append({'flux': (sp.stats.uniform.ppf, [0, 1], {}),
                              'x': (sp.stats.uniform.ppf, [-6, 12], {}),
                              'y': (sp.stats.uniform.ppf, [-6, 12], {}),
                              'bmaj': (sp.stats.uniform.ppf, [0, 3], {})})
    results = fit_model_with_nestle(uv_fits, mdl_file, components_priors,
                                    outdir=outdir)


data_dir = '/home/ilya/code/vlbi_errors/silke'
# uv_fits = '0851+202.u.2012_11_11.uvf'
uv_fits = '0851+202.u.2004_11_05.uvf'
# mdl_fname = '2.mod.2012_11_11'
mdl_fname = '1.mod.2004_11_05'
uv_data = UVData(os.path.join(data_dir, uv_fits))
comps = import_difmap_model(mdl_fname, data_dir)
model = Model(stokes='I')
model.add_components(*comps)

fig = uv_data.uvplot(style='a&p')
uv_data.substitute([model])
uv_data.uvplot(color='r', fig=fig, phase_range=[-0.2, 0.2])