Esempio n. 1
0
def run_emcee(x, lnprob, args, nwalkers, nruns, fudge, chain_name, burns,
              pool=None,
              nthreads=1,
              namearray=[],
              resume=False,
              w=False):

    ndim = len(x)

    p0 = []

    if resume == True:
        p0, ndone = resume_file(chain_name, ndim, nwalkers)
        nruns -= ndone
        n = (ndone + burns) / nwalkers
    else:
        for i in range(0, nwalkers):
            shuffle = (10 ** (fudge * (np.random.rand(ndim) - 0.5)))
            p0 += [list(shuffle * x)]
        initiate_file(chain_name, ndim, blob_list=namearray, w=w)
        n = 0

    iterations = int(nruns / nwalkers)

    if pool != None:
        sampler = EnsembleSampler(nwalkers, ndim, lnprob, args=args, pool=pool)
    else:
        sampler = EnsembleSampler(nwalkers, ndim, lnprob,
                                  args=args,
                                  threads=nthreads)

    for result in sampler.sample(p0, iterations=iterations, storechain=False):
        n += 1
        if (n > burns / nwalkers):
            position = result[0]
            logl = result[1]
            with fFITS(chain_name, 'rw') as fits:
                for k in range(position.shape[0]):
                    output = {
                        'lp': np.array([logl[k]]),
                        'x': np.array([position[k]])
                    }
                    for i in range(0, len(namearray)):
                        blob = result[3][k][i]
                        output[namearray[i]] = np.array([blob])
                    if np.isfinite(logl[k]):
                        fits['MCMC'].append(output)
    pool.close()
Esempio n. 2
0
def runSampler(niters=400000,
               thin=400,
               newData=True,
               filename="./recepmod/data/test_chain.h5",
               npar=36):
    """ Run the sampling. """
    from emcee import EnsembleSampler
    from .StoneModel import StoneModel
    from tqdm import tqdm

    # Load model
    StoneM = StoneModel(newData)

    # Get uniform distribution of positions for start
    p0, ndims, nwalkers = getUniformStart(StoneM)

    # Set up sampler
    sampler = EnsembleSampler(nwalkers,
                              ndims,
                              StoneM.NormalErrorCoef,
                              threads=npar)

    if filename is not None:
        f, dset = startH5File(StoneM, filename)

    # Setup thinning tracking
    thinTrack = -thin

    for p, lnprob, _ in tqdm(sampler.sample(p0,
                                            iterations=niters,
                                            storechain=False),
                             total=niters):
        if thinTrack < thin:
            thinTrack += 1
        else:
            matOut = np.concatenate(
                (lnprob.reshape(nwalkers, 1), np.arange(0, nwalkers).reshape(
                    nwalkers, 1), p.reshape(nwalkers, ndims)),
                axis=1)

            if filename is not None:
                fShape = dset.shape
                dset.resize((fShape[0] + np.shape(matOut)[0], fShape[1]))
                dset[fShape[0]:, :] = matOut
                f.flush()

            thinTrack = 1
Esempio n. 3
0
                           model.ndim,
                           lnpostfn,
                           kwargs=postkwargs)

# Initial center for the walkers
initial = [0.5, 0.2, 800, 100]
# give the walkers initial parameter positions with 10% dispersion
initial = [
    np.random.normal(loc=i, scale=0.1 * i, size=(rp['nwalkers']))
    for i in initial
]
initial = np.array(initial).T

# Now iterate the sampler
for i, result in enumerate(
        esampler.sample(initial, iterations=rp['niter'], storechain=True)):

    if (i % 10) == 0:
        print(i)

# Write out some statistics from the last half of the chains, and plot the
# walker evolution
half = max(rp['niter'] / 2, 100)
import matplotlib.pyplot as pl
for (n, c) in zip(model.theta_names, esampler.chain.T):
    print('{}: mean={}, rms={}'.format(n, c[half:, :].mean(),
                                       c[half:, :].std()))
    fig, ax = pl.subplots()
    for i in range(rp['nwalkers']):
        ax.plot(c[:, i])
    ax.set_ylabel(n)
Esempio n. 4
0
#print("Mean autocorrelation time: {0:.3f} steps".format(np.mean(s.get_autocorr_time())))

#
#convergence-based MCMC
#
max_n = 1000000

# We'll track how the average autocorrelation time estimate changes
index = 0
autocorr = np.empty(max_n)

# This will be useful to testing convergence
old_tau = np.inf

# Now we'll sample for up to max_n steps
for sample in s.sample(params0, iterations=max_n, progress=True):
    # Only check convergence every 10000 steps
    if s.iteration % 10000:
        continue

    # Compute the autocorrelation time so far
    # Using tol=0 means that we'll always get an estimate even
    # if it isn't trustworthy
    tau = s.get_autocorr_time(tol=0)
    autocorr[index] = np.mean(tau)
    index += 1

    # Check convergence
    converged = np.all(tau * 100 < s.iteration)
    converged &= np.all(np.abs(old_tau - tau) / tau < 0.01)
    if converged:
Esempio n. 5
0
class OCLBaseLPF(BaseLPF):
    def __init__(self, target: str, passbands: list, times: list = None, fluxes: list = None, errors: list = None,
                 pbids: list = None, covariates: list = None, nsamples: tuple = None, exptimes: tuple = None,
                 klims: tuple = (0.01, 0.75), nk: int = 512, nz: int = 512, cl_ctx=None, cl_queue=None, **kwargs):

        self.cl_ctx = cl_ctx or self.tm.ctx
        self.cl_queue = cl_queue or self.tm.queue
        self.cl_lnl_chunks = kwargs.get('cl_lnl_chunks', 1)
        super().__init__(target, passbands, times, fluxes, errors, pbids, covariates, None, nsamples, exptimes)

        self.tm = QuadraticModelCL(klims=klims, nk=nk, nz=nz, cl_ctx=cl_ctx, cl_queue=cl_queue)
        self.tm.set_data(self.timea, self.lcids, self.pbids, self.nsamples, self.exptimes)

        src = """
           __kernel void lnl2d(const int nlc, __global const float *obs, __global const float *mod, __global const float *err, __global const int *lcids, __global float *lnl2d){
                  uint i_tm = get_global_id(1);    // time vector index
                  uint n_tm = get_global_size(1);  // time vector size
                  uint i_pv = get_global_id(0);    // parameter vector index
                  uint n_pv = get_global_size(0);  // parameter vector population size
                  uint gid = i_pv*n_tm + i_tm;     // global linear index
                  float e = err[i_pv*nlc + lcids[i_tm]];
                  lnl2d[gid] = -log(e) - 0.5f*log(2*M_PI_F) - 0.5f*pown((obs[i_tm]-mod[gid]) / e, 2);
            }

            __kernel void lnl1d(const uint npt, __global float *lnl2d, __global float *lnl1d){
                  uint i_pv = get_global_id(0);    // parameter vector index
                  uint n_pv = get_global_size(0);  // parameter vector population size
            
                int i;
                bool is_even;
                uint midpoint = npt;
                __global float *lnl = &lnl2d[i_pv*npt];
                
                while(midpoint > 1){
                    is_even = midpoint % 2 == 0;   
                    if (is_even == 0){
                        lnl[0] += lnl[midpoint-1];
                    }
                    midpoint /= 2;
                    
                    for(i=0; i<midpoint; i++){
                        lnl[i] = lnl[i] + lnl[midpoint+i];
                    }
                }
                lnl1d[i_pv] = lnl[0];
            }
            
            __kernel void lnl1d_chunked(const uint npt, __global float *lnl2d, __global float *lnl1d){
                uint ipv = get_global_id(0);    // parameter vector index
                uint npv = get_global_size(0);  // parameter vector population size
                uint ibl = get_global_id(1);    // block index
                uint nbl = get_global_size(1);  // number of blocks
                uint lnp = npt / nbl;
                  
                __global float *lnl = &lnl2d[ipv*npt + ibl*lnp];
              
                if(ibl == nbl-1){
                    lnp = npt - (ibl*lnp);
                }
            
                prefetch(lnl, lnp);
                bool is_even;
                uint midpoint = lnp;
                while(midpoint > 1){
                    is_even = midpoint % 2 == 0;   
                    if (is_even == 0){
                        lnl[0] += lnl[midpoint-1];
                    }
                    midpoint /= 2;
            
                    for(int i=0; i<midpoint; i++){
                        lnl[i] = lnl[i] + lnl[midpoint+i];
                    }
                }
                lnl1d[ipv*nbl + ibl] = lnl[0];
            }
        """
        self.prg_lnl = cl.Program(self.cl_ctx, src).build()
        self.lnlikelihood = self.lnlikelihood_ocl


    def _init_data(self, times, fluxes, pbids, covariates=None, errors=None, nsamples=None, exptimes=None):
        super()._init_data(times, fluxes, pbids, covariates, errors, nsamples, exptimes)
        self.nlc = int32(self.nlc)

        # Initialise the Python arrays
        # ----------------------------
        self.timea = self.timea.astype('f')
        self.ofluxa = self.ofluxa.astype('f')
        self.lnl2d = zeros([50, self.ofluxa.size], 'f')
        self.lnl1d = zeros([self.lnl2d.shape[0], self.cl_lnl_chunks], 'f')
        self.ferr = zeros([50, self.nlc])
        self.lcids = self.lcids.astype('int32')
        self.pbids = self.pbids.astype('int32')
        if covariates is not None:
            self.cova = self.cova.astype('f')

        # Initialise OpenCL buffers
        # -------------------------
        mf = cl.mem_flags
        self._b_flux = cl.Buffer(self.cl_ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.ofluxa)
        self._b_err = cl.Buffer(self.cl_ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.ferr)
        self._b_lnl2d = cl.Buffer(self.cl_ctx, mf.WRITE_ONLY, self.lnl2d.nbytes)
        self._b_lnl1d = cl.Buffer(self.cl_ctx, mf.WRITE_ONLY, self.lnl1d.nbytes)
        self._b_lcids = cl.Buffer(self.cl_ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.lcids)
        if covariates is not None:
            self._b_covariates = cl.Buffer(self.cl_ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.cova)

    def transit_model(self, pvp, copy=False):
        pvp = atleast_2d(pvp)
        pvp_t = zeros([pvp.shape[0], 8], "f")
        uv = zeros([pvp.shape[0], 2], "f")
        pvp_t[:, 0:1] = sqrt(pvp[:, self._pid_k2])  # Radius ratio
        pvp_t[:, 1:3] = pvp[:, 0:2]                 # Transit centre and orbital period
        pvp_t[:, 3] = a = as_from_rhop(pvp[:, 2], pvp[:, 1])
        pvp_t[:, 4] = i_from_ba(pvp[:, 3], a)
        a, b = sqrt(pvp[:, self._sl_ld][:, 0]), 2. * pvp[:, self._sl_ld][:, 1]
        uv[:, 0] = a * b
        uv[:, 1] = a * (1. - b)
        flux = self.tm.evaluate_t_pv2d(pvp_t, uv, copy=copy)
        return flux if copy else None

    def flux_model(self, pvp):
        return self.transit_model(pvp, copy=True).astype('d')

    def _lnl2d(self, pv):
        if self.lnl2d.shape[0] != pv.shape[0] or self.lnl1d.size != pv.shape[0]:
            self.err = zeros([pv.shape[0], self.nlc], 'f')
            self._b_err.release()
            self._b_err = cl.Buffer(self.cl_ctx, cl.mem_flags.WRITE_ONLY, self.err.nbytes)
            self.lnl2d = zeros([pv.shape[0], self.ofluxa.size], 'f')
            self._b_lnl2d.release()
            self._b_lnl2d = cl.Buffer(self.cl_ctx, cl.mem_flags.WRITE_ONLY, self.lnl2d.nbytes)
            self.lnl1d = zeros([pv.shape[0], self.cl_lnl_chunks], 'f')
            if self._b_lnl1d:
                self._b_lnl1d.release()
            self._b_lnl1d = cl.Buffer(self.cl_ctx, cl.mem_flags.WRITE_ONLY, self.lnl1d.nbytes)
        self.transit_model(pv)
        cl.enqueue_copy(self.cl_queue, self._b_err, (10 ** pv[:, self._sl_err]).astype('f'))
        self.prg_lnl.lnl2d(self.cl_queue, self.tm.f.shape, None, self.nlc, self._b_flux, self.tm._b_f,
                           self._b_err, self._b_lcids, self._b_lnl2d)


    def lnlikelihood_numba(self, pv):
        self._lnl2d(pv)
        cl.enqueue_copy(self.cl_queue, self.lnl2d, self._b_lnl2d)
        lnl = psum2d(self.lnl2d)
        return where(isfinite(lnl), lnl, -inf)


    def lnlikelihood_ocl(self, pv):
        self._lnl2d(pv)
        self.prg_lnl.lnl1d_chunked(self.cl_queue, [self.lnl2d.shape[0], self.cl_lnl_chunks], None,
                                   uint32(self.lnl2d.shape[1]), self._b_lnl2d, self._b_lnl1d)
        cl.enqueue_copy(self.cl_queue, self.lnl1d, self._b_lnl1d)
        lnl = self.lnl1d.astype('d').sum(1)
        return lnl

    def lnlikelihood_numpy(self, pv):
        self._lnl2d(pv)
        cl.enqueue_copy(self.cl_queue, self.lnl2d, self._b_lnl2d)
        lnl = self.lnl2d.astype('d').sum(1)
        return where(isfinite(lnl), lnl, -inf)

    def lnprior(self, pv):
        lnpriors = zeros(pv.shape[0])
        for i, p in enumerate(self.ps.priors):
            lnpriors += p.logpdf(pv[:, i])
        return lnpriors + self.additional_priors(pv)

    def lnposterior(self, pv):
        lnp = self.lnlikelihood(pv) + self.lnprior(pv)
        return where(isfinite(lnp), lnp, -inf)

    def optimize_global(self, niter=200, npop=50, population=None, label='Global optimisation', leave=False):
        if self.de is None:
            self.de = DiffEvol(self.lnposterior, clip(self.ps.bounds, -1, 1), npop, maximize=True, vectorize=True)
            if population is None:
                self.de._population[:, :] = self.create_pv_population(npop)
            else:
                self.de._population[:, :] = population
        for _ in tqdm(self.de(niter), total=niter, desc=label, leave=leave):
            pass

    def sample_mcmc(self, niter=500, thin=5, label='MCMC sampling', reset=False, leave=True):
        if not with_emcee:
            raise ImportError('Emcee not installed.')
        if self.sampler is None:
            self.sampler = EnsembleSampler(self.de.n_pop, self.de.n_par, self.lnposterior, vectorize=True)
            pop0 = self.de.population
        else:
            pop0 = self.sampler.chain[:, -1, :].copy()
        if reset:
            self.sampler.reset()
        for _ in tqdm(self.sampler.sample(pop0, iterations=niter, thin=thin), total=niter, desc=label, leave=False):
            pass


    def remove_outliers(self, sigma=5):
        fmodel = self.flux_model(self.de.minimum_location)[0]
        times, fluxes, pbids, errors = [], [], [], []
        for i in range(len(self.times)):
            res = self.fluxes[i] - fmodel[i]
            mask = ~sigma_clip(res, sigma=sigma).mask
            times.append(self.times[i][mask])
            fluxes.append(self.fluxes[i][mask])
            if self.errors is not None:
                errors.append(self.errors[i][mask])
        self._init_data(times, fluxes, self.pbids, (errors if self.errors is not None else None))
Esempio n. 6
0
class LPFunction(object):
    """A basic log posterior function class.
    """
    def __init__(self, time, flux, nthreads=1):

        # Set up the transit model
        # ------------------------
        self.tm = MA(interpolate=True, klims=(0.08, 0.13), nthr=nthreads)
        self.nthr = nthreads

        # Initialise data
        # ---------------
        self.time = time.copy() if time is not None else array([])
        self.flux_o = flux.copy() if flux is not None else array([])
        self.npt = self.time.size

        # Set the optimiser and the MCMC sampler
        # --------------------------------------
        self.de = None
        self.sampler = None

        # Set up the parametrisation and priors
        # -------------------------------------
        psystem = [
            GParameter('tc', 'zero_epoch', 'd', NP(1.01, 0.02), (-inf, inf)),
            GParameter('pr', 'period', 'd', NP(2.50, 1e-7), (0, inf)),
            GParameter('rho', 'stellar_density', 'g/cm^3', UP(0.90, 2.50),
                       (0.90, 2.5)),
            GParameter('b', 'impact_parameter', 'R_s', UP(0.00, 1.00),
                       (0.00, 1.0)),
            GParameter('k2', 'area_ratio', 'A_s', UP(0.08**2, 0.13**2),
                       (1e-8, inf))
        ]

        pld = [
            PParameter('q1', 'q1_coefficient', '', UP(0, 1), bounds=(0, 1)),
            PParameter('q2', 'q2_coefficient', '', UP(0, 1), bounds=(0, 1))
        ]

        pbl = [
            LParameter('es',
                       'white_noise',
                       '',
                       UP(1e-6, 1e-2),
                       bounds=(1e-6, 1e-2))
        ]
        per = [
            LParameter('bl',
                       'baseline',
                       '',
                       NP(1.00, 0.001),
                       bounds=(0.8, 1.2))
        ]

        self.ps = ParameterSet()
        self.ps.add_global_block('system', psystem)
        self.ps.add_passband_block('ldc', 2, 1, pld)
        self.ps.add_lightcurve_block('baseline', 1, 1, pbl)
        self.ps.add_lightcurve_block('error', 1, 1, per)
        self.ps.freeze()

    def compute_baseline(self, pv):
        """Constant baseline model"""
        return full_like(self.flux_o, pv[8])

    def compute_transit(self, pv):
        """Transit model"""
        _a = as_from_rhop(
            pv[2], pv[1]
        )  # Scaled semi-major axis from stellar density and orbital period
        _i = mt.acos(
            pv[3] /
            _a)  # Inclination from impact parameter and semi-major axis
        _k = mt.sqrt(pv[4])  # Radius ratio from area ratio

        a, b = mt.sqrt(pv[5]), 2 * pv[6]
        _uv = array([a * b,
                     a * (1. - b)])  # Quadratic limb darkening coefficients

        return self.tm.evaluate(self.time, _k, _uv, pv[0], pv[1], _a, _i)

    def compute_lc_model(self, pv):
        """Combined baseline and transit model"""
        return self.compute_baseline(pv) * self.compute_transit(pv)

    def lnprior(self, pv):
        """Log prior"""
        if any(pv < self.ps.lbounds) or any(pv > self.ps.ubounds):
            return -inf
        else:
            return self.ps.lnprior(pv)

    def lnlikelihood(self, pv):
        """Log likelihood"""
        flux_m = self.compute_lc_model(pv)
        return ll_normal_es(self.flux_o, flux_m, pv[7])

    def lnposterior(self, pv):
        """Log posterior"""
        lnprior = self.lnprior(pv)
        if isinf(lnprior):
            return lnprior
        else:
            return lnprior + self.lnlikelihood(pv)

    def create_pv_population(self, npop=50):
        return self.ps.sample_from_prior(npop)

    def optimize(self,
                 niter=200,
                 npop=50,
                 population=None,
                 label='Optimisation'):
        """Global optimisation using Differential evolution"""
        if self.de is None:
            self.de = DiffEvol(self.lnposterior,
                               clip(self.ps.bounds, -1, 1),
                               npop,
                               maximize=True)
            if population is None:
                self.de._population[:, :] = self.create_pv_population(npop)
            else:
                self.de._population[:, :] = population
        for _ in tqdm(self.de(niter), total=niter, desc=label):
            pass

    def sample(self, niter=500, thin=5, label='MCMC sampling', reset=False):
        """MCMC sampling using emcee"""
        if self.sampler is None:
            self.sampler = EnsembleSampler(self.de.n_pop, self.de.n_par,
                                           self.lnposterior)
            pop0 = self.de.population
        else:
            pop0 = self.sampler.chain[:, -1, :].copy()
        if reset:
            self.sampler.reset()
        for _ in tqdm(self.sampler.sample(pop0, iterations=niter, thin=thin),
                      total=niter,
                      desc=label):
            pass
Esempio n. 7
0
class BaseLPF:
    _lpf_name = 'BaseLPF'

    def __init__(self, name: str, passbands: list, times: list = None, fluxes: list = None, errors: list = None,
                 pbids: list = None, covariates: list = None, wnids: list = None, tm: TransitModel = None,
                 nsamples: tuple = 1, exptimes: tuple = 0., init_data=True, result_dir: Path = None):
        """The base Log Posterior Function class.

        The `BaseLPF` class creates the basis for transit light curve analyses using `PyTransit`. This class can be
        used in a basic analysis directly, or it can be inherited to create a basis for a more complex analysis.

        Parameters
        ----------
        name: str
            Name of the log posterior function instance.

        passbands: iterable
            List of unique passband names (filters) that the light curves have been observed in.

        times: iterable
            List of 1d ndarrays each containing the mid-observation times for a single light curve.

        fluxes: iterable
            List of 1d ndarrays each containing the normalized fluxes for a single light curve.

        errors: iterable
            List of 1d ndarrays each containing the flux measurement uncertainties for a single light curvel.

        pbids: iterable of ints
            List of passband indices mapping each light curve to a single passband.

        covariates: iterable
            List of covariates one 2d narray per light curve.

        wnids: iterable of ints
            List of noise set indices mapping each light curve to a single noise set.

        tm: TransitModel
            Transitmodel to use instead of the default model.

        nsamples: list[int]
            List of supersampling factors.  The values should be integers and given one per light curve.

        exptimes: list[float]
            List of exposure times. The values should be floats with the time given in days.

        init_data: bool
            Set to `False` to allow the LPF to be initialized without data. This is mainly for debugging.

        result_dir: Path
            Default saving directory
        """

        self._pre_initialisation()

        self.tm = tm or QuadraticModel(klims=(0.01, 0.75), nk=512, nz=512)

        # LPF name
        # --------
        self.name = name
        self.result_dir = result_dir

        # Passbands
        # ---------
        # Should be arranged from blue to red
        if isinstance(passbands, (list, tuple, ndarray)):
            self.passbands = passbands
        else:
            self.passbands = [passbands]
        self.npb = npb = len(self.passbands)

        self.nsamples = None
        self.exptimes = None

        # Declare high-level objects
        # --------------------------
        self.ps = None          # Parametrisation
        self.de = None          # Differential evolution optimiser
        self.sampler = None     # MCMC sampler
        self.instrument = None  # Instrument
        self.ldsc = None        # Limb darkening set creator
        self.ldps = None        # Limb darkening profile set
        self.cntm = None        # Contamination model

        # Declare data arrays and variables
        # ---------------------------------
        self.nlc: int = 0                # Number of light curves
        self.n_noise_blocks: int = 0     # Number of noise blocks
        self.noise_ids = None
        self.times: list = None          # List of time arrays
        self.fluxes: list = None         # List of flux arrays
        self.errors: list = None         # List of flux uncertainties
        self.covariates: list = None     # List of covariates
        self.wn: ndarray = None          # Array of white noise estimates for each light curve
        self.timea: ndarray = None       # Array of concatenated times
        self.mfluxa: ndarray = None      # Array of concatenated model fluxes
        self.ofluxa: ndarray = None      # Array of concatenated observed fluxes
        self.errora: ndarray = None      # Array of concatenated model fluxes

        self.lcids: ndarray = None       # Array of light curve indices for each datapoint
        self.pbids: ndarray = None       # Array of passband indices for each light curve
        self.lcslices: list = None       # List of light curve slices

        self._local_minimization = None

        # Initialise the additional lnprior list
        # --------------------------------------
        self.lnpriors = []

        if init_data:
            # Set up the observation data
            # ---------------------------
            self._init_data(times = times, fluxes = fluxes, pbids = pbids, covariates = covariates,
                            errors = errors, wnids = wnids, nsamples = nsamples, exptimes = exptimes)

            # Set up the parametrisation
            # --------------------------
            self._init_parameters()

            # Inititalise the instrument
            # --------------------------
            self._init_instrument()

        self._post_initialisation()


    def _init_data(self, times, fluxes, pbids=None, covariates=None, errors=None, wnids = None, nsamples=1, exptimes=0.):

        if isinstance(times, ndarray) and times.ndim == 1 and times.dtype == float:
            times = [times]

        if isinstance(fluxes, ndarray) and fluxes.ndim == 1 and fluxes.dtype == float:
            fluxes = [fluxes]

        if pbids is None:
            if self.pbids is None:
                self.pbids = zeros(len(fluxes), int)
        else:
            self.pbids = atleast_1d(pbids).astype('int')

        self.nlc = len(times)
        self.times = times
        self.fluxes = fluxes
        self.wn = [nanstd(diff(f)) / sqrt(2) for f in fluxes]
        self.timea = concatenate(self.times)
        self.ofluxa = concatenate(self.fluxes)
        self.mfluxa = zeros_like(self.ofluxa)
        self.lcids = concatenate([full(t.size, i) for i, t in enumerate(self.times)])

        # TODO: Noise IDs get scrambled when removing transits, fix!!!
        if wnids is None:
            if self.noise_ids is None:
                self.noise_ids = zeros(self.nlc, int)
                self.n_noise_blocks = 1
        else:
            self.noise_ids = asarray(wnids)
            self.n_noise_blocks = len(unique(self.noise_ids))
            assert self.noise_ids.size == self.nlc, "Need one noise block id per light curve."
            assert self.noise_ids.max() == self.n_noise_blocks - 1, "Error initialising noise block ids."

        if isscalar(nsamples):
            self.nsamples = full(self.nlc, nsamples)
            self.exptimes = full(self.nlc, exptimes)
        else:
            assert (len(nsamples) == self.nlc) and (len(exptimes) == self.nlc)
            self.nsamples = asarray(nsamples, 'int')
            self.exptimes = asarray(exptimes)

        self.tm.set_data(self.timea, self.lcids, self.pbids, self.nsamples, self.exptimes)

        if errors is None:
            self.errors = array([full(t.size, nan) for t in self.times])
        else:
            self.errors = asarray(errors)
        self.errora = concatenate(self.errors)

        # Initialise the light curves slices
        # ----------------------------------
        self.lcslices = []
        sstart = 0
        for i in range(self.nlc):
            s = self.times[i].size
            self.lcslices.append(s_[sstart:sstart + s])
            sstart += s

        # Initialise the covariate arrays, if given
        # -----------------------------------------
        if covariates is not None:
            self.covariates = covariates
            for cv in self.covariates:
                cv = (cv - cv.mean(0)) / cv.std(0)
            #self.ncovs = self.covariates[0].shape[1]
            #self.covsize = array([c.size for c in self.covariates])
            #self.covstart = concatenate([[0], self.covsize.cumsum()[:-1]])
            #self.cova = concatenate(self.covariates)

    def print_parameters(self, columns: int = 2):
        columns = max(1, columns)
        for i, p in enumerate(self.ps):
            print(p.__repr__(), end=('\n' if i % columns == columns - 1 else '\t'))

    def _init_parameters(self):
        self.ps = ParameterSet()
        self._init_p_orbit()
        self._init_p_planet()
        self._init_p_limb_darkening()
        self._init_p_baseline()
        self._init_p_noise()
        self.ps.freeze()

    def _init_p_orbit(self):
        """Orbit parameter initialisation.
        """
        porbit = [
            GParameter('tc',  'zero_epoch',       'd',      N(0.0,  0.1), (-inf, inf)),
            GParameter('p',   'period',           'd',      N(1.0, 1e-5), (0,    inf)),
            GParameter('rho', 'stellar_density',  'g/cm^3', U(0.1, 25.0), (0,    inf)),
            GParameter('b',   'impact_parameter', 'R_s',    U(0.0,  1.0), (0,      1))]
        self.ps.add_global_block('orbit', porbit)

    def _init_p_planet(self):
        """Planet parameter initialisation.
        """
        pk2 = [PParameter('k2', 'area_ratio', 'A_s', GM(0.1), (0.01**2, 0.75**2))]
        self.ps.add_passband_block('k2', 1, 1, pk2)
        self._pid_k2 = repeat(self.ps.blocks[-1].start, self.npb)
        self._start_k2 = self.ps.blocks[-1].start
        self._sl_k2 = self.ps.blocks[-1].slice

    def _init_p_limb_darkening(self):
        """Limb darkening parameter initialisation.
        """
        pld = concatenate([
            [PParameter('q1_{:d}'.format(i), 'q1_coefficient', '', U(0, 1), bounds=(0, 1)),
             PParameter('q2_{:d}'.format(i), 'q2_coefficient', '', U(0, 1), bounds=(0, 1))]
            for i in range(self.npb)])
        self.ps.add_passband_block('ldc', 2, self.npb, pld)
        self._sl_ld = self.ps.blocks[-1].slice
        self._start_ld = self.ps.blocks[-1].start

    def _init_p_baseline(self):
        """Baseline parameter initialisation.
        """
        self._sl_bl = None

    def _init_p_noise(self):
        """Noise parameter initialisation.
        """
        pns = [LParameter('loge_{:d}'.format(i), 'log10_error_{:d}'.format(i), '', U(-4, 0), bounds=(-4, 0)) for i in range(self.n_noise_blocks)]
        self.ps.add_lightcurve_block('log_err', 1, self.n_noise_blocks, pns)
        self._sl_err = self.ps.blocks[-1].slice
        self._start_err = self.ps.blocks[-1].start

    def _init_instrument(self):
        pass

    def _pre_initialisation(self):
        pass

    def _post_initialisation(self):
        pass

    def create_pv_population(self, npop=50):
        pvp = self.ps.sample_from_prior(npop)
        for sl in self.ps.blocks[1].slices:
            pvp[:,sl] = uniform(0.01**2, 0.25**2, size=(npop, 1))

        # With LDTk
        # ---------
        #
        # Use LDTk to create the sample if LDTk has been initialised.
        #
        if self.ldps:
            istart = self._start_ld
            cms, ces = self.ldps.coeffs_tq()
            for i, (cm, ce) in enumerate(zip(cms.flat, ces.flat)):
                pvp[:, i + istart] = normal(cm, ce, size=pvp.shape[0])

        # No LDTk
        # -------
        #
        # Ensure that the total limb darkening decreases towards
        # red passbands.
        #
        else:
            ldsl = self._sl_ld
            for i in range(pvp.shape[0]):
                pid = argsort(pvp[i, ldsl][::2])[::-1]
                pvp[i, ldsl][::2] = pvp[i, ldsl][::2][pid]
                pvp[i, ldsl][1::2] = pvp[i, ldsl][1::2][pid]

        # Estimate white noise from the data
        # ----------------------------------
        for i in range(self.nlc):
            wn = diff(self.ofluxa).std() / sqrt(2)
            pvp[:, self._start_err] = log10(uniform(0.5*wn, 2*wn, size=npop))
        return pvp

    def baseline(self, pv):
        """Multiplicative baseline"""
        return 1.

    def trends(self, pv):
        """Additive trends"""
        return 0.

    def transit_model(self, pv, copy=True):
        pv = atleast_2d(pv)
        pvp = map_pv(pv)
        ldc = map_ldc(pv[:,self._sl_ld])
        flux = self.tm.evaluate_pv(pvp, ldc, copy)
        return flux

    def flux_model(self, pv):
        baseline    = self.baseline(pv)
        trends      = self.trends(pv)
        model_flux = self.transit_model(pv)
        return baseline * model_flux + trends

    def residuals(self, pv):
        return self.ofluxa - self.flux_model(pv)

    def set_prior(self, parameter, prior, *nargs) -> None:
        if isinstance(parameter, str):
            descriptions = self.ps.descriptions
            names = self.ps.names
            if parameter in descriptions:
                parameter = descriptions.index(parameter)
            elif parameter in names:
                parameter = names.index(parameter)
            else:
                params = ', '.join([f"{ln} ({sn})" for ln, sn in zip(self.ps.descriptions, self.ps.names)])
                raise ValueError(f'Parameter "{parameter}" not found from the parameter set: {params}')

        if isinstance(prior, str):
            if prior.lower() in ['n', 'np', 'normal']:
                prior = N(nargs[0], nargs[1])
            elif prior.lower() in ['u', 'up', 'uniform']:
                prior = U(nargs[0], nargs[1])
            else:
                raise ValueError(f'Unknown prior "{prior}". Allowed values are (N)ormal and (U)niform.')

        self.ps[parameter].prior = prior

    def set_radius_ratio_prior(self, kmin, kmax):
        for p in self.ps[self._sl_k2]:
            p.prior = U(kmin ** 2, kmax ** 2)
            p.bounds = [kmin ** 2, kmax ** 2]
        self.ps.thaw()
        self.ps.freeze()

    def add_t14_prior(self, mean: float, std: float) -> None:
        """Add a normal prior on the transit duration.

        Parameters
        ----------
        mean: float
            Mean of the normal distribution
        std: float
            Standard deviation of the normal distribution.
        """

        def T14(pv):
            pv = atleast_2d(pv)
            a = as_from_rhop(pv[:, 2], pv[:, 1])
            t14 = duration_eccentric(pv[:, 1], sqrt(pv[:, 4]), a, arccos(pv[:, 3] / a), 0, 0, 1)
            return norm.logpdf(t14, mean, std)

        self.lnpriors.append(T14)

    def add_as_prior(self, mean: float, std: float) -> None:
        """Add a normal prior on the scaled semi-major axis :math:`(a / R_\star)`.

        Parameters
        ----------
        mean: float
            Mean of the normal distribution.
        std: float
            Standard deviation of the normal distribution
        """
        def as_prior(pv):
            a = as_from_rhop(pv[2], pv[1])
            return norm.logpdf(a, mean, std)
        self.lnpriors.append(as_prior)

    def add_ldtk_prior(self, teff: tuple, logg: tuple, z: tuple, passbands: tuple,
                       uncertainty_multiplier: float = 3, **kwargs) -> None:
        """Add a LDTk-based prior on the limb darkening.

        Parameters
        ----------
        teff
        logg
        z
        passbands
        uncertainty_multiplier

        Returns
        -------

        """
        if 'pbs' in kwargs.keys():
            raise DeprecationWarning("The 'pbs' argument has been renamed to 'passbands'")

        if isinstance(passbands[0], str):
            raise DeprecationWarning(
                'Passing passbands by name has been deprecated, they should be now Filter instances.')

        self.ldsc = LDPSetCreator(teff, logg, z, list(passbands))
        self.ldps = self.ldsc.create_profiles(1000)
        self.ldps.resample_linear_z()
        self.ldps.set_uncertainty_multiplier(uncertainty_multiplier)

        def ldprior(pv):
            return self.ldps.lnlike_tq(pv[:, self._sl_ld].reshape([pv.shape[0], -1, 2]))

        self.lnpriors.append(ldprior)


    def remove_outliers(self, sigma=5):
        fmodel = squeeze(self.flux_model(self.de.minimum_location))
        covariates = [] if self.covariates is not None else None
        times, fluxes, lcids, errors = [], [], [], []
        for i in range(len(self.times)):
            res = self.fluxes[i] - fmodel[self.lcslices[i]]
            mask = ~sigma_clip(res, sigma=sigma).mask
            times.append(self.times[i][mask])
            fluxes.append(self.fluxes[i][mask])
            if covariates is not None:
                covariates.append(self.covariates[i][mask])
            if self.errors is not None:
                errors.append(self.errors[i][mask])

        self._init_data(times=times, fluxes=fluxes, covariates=self.covariates, pbids=self.pbids,
                        errors=(errors if self.errors is not None else None), wnids=self.noise_ids,
                        nsamples=self.nsamples, exptimes=self.exptimes)


    def remove_transits(self, tids):
        m = ones(len(self.times), bool)
        m[tids] = False
        self._init_data(self.times[m], self.fluxes[m], self.pbids[m],
                        self.covariates[m] if self.covariates is not None else None,
                        self.errors[m], self.noise_ids[m], self.nsamples[m], self.exptimes[m])
        self._init_parameters()

    def lnprior(self, pv: ndarray) -> Union[Iterable,float]:
        """Log prior density for a 1D or 2D array of model parameters.

        Parameters
        ----------
        pv: ndarray
            Either a 1D parameter vector or a 2D parameter array.

        Returns
        -------
            Log prior density for the given parameter vector(s).
        """
        return self.ps.lnprior(pv) + self.additional_priors(pv)

    def additional_priors(self, pv):
        """Additional priors."""
        pv = atleast_2d(pv)
        return sum([f(pv) for f in self.lnpriors], 0)

    def lnlikelihood(self, pv):
        """Log likelihood for a 1D or 2D array of model parameters.

        Parameters
        ----------
        pv: ndarray
            Either a 1D parameter vector or a 2D parameter array.

        Returns
        -------
            Log likelihood for the given parameter vector(s).
        """
        flux_m = self.flux_model(pv)
        wn = 10**(atleast_2d(pv)[:,self._sl_err])
        return lnlike_normal_v(self.ofluxa, flux_m, wn, self.noise_ids, self.lcids)

    def lnposterior(self, pv):
        lnp = self.lnprior(pv) + self.lnlikelihood(pv)
        return where(isfinite(lnp), lnp, -inf)

    def __call__(self, pv):
        return self.lnposterior(pv)

    def optimize_global(self, niter=200, npop=50, population=None, label='Global optimisation', leave=False,
                        plot_convergence: bool = True, use_tqdm: bool = True):

        if self.de is None:
            self.de = DiffEvol(self.lnposterior, clip(self.ps.bounds, -1, 1), npop, maximize=True, vectorize=True)
            if population is None:
                self.de._population[:, :] = self.create_pv_population(npop)
            else:
                self.de._population[:,:] = population
        for _ in tqdm(self.de(niter), total=niter, desc=label, leave=leave, disable=(not use_tqdm)):
            pass

        if plot_convergence:
            fig, axs = subplots(1, 5, figsize=(13, 2), constrained_layout=True)
            rfit = self.de._fitness
            mfit = isfinite(rfit)

            if hasattr(self, '_old_de_fitness'):
                m = isfinite(self._old_de_fitness)
                axs[0].hist(-self._old_de_fitness[m], facecolor='midnightblue', bins=25, alpha=0.25)
            axs[0].hist(-rfit[mfit], facecolor='midnightblue', bins=25)

            for i, ax in zip([0, 2, 3, 4], axs[1:]):
                if hasattr(self, '_old_de_fitness'):
                    m = isfinite(self._old_de_fitness)
                    ax.plot(self._old_de_population[m, i], -self._old_de_fitness[m], 'kx', alpha=0.25)
                ax.plot(self.de.population[mfit, i], -rfit[mfit], 'k.')
                ax.set_xlabel(self.ps.descriptions[i])
            setp(axs, yticks=[])
            setp(axs[1], ylabel='Log posterior')
            setp(axs[0], xlabel='Log posterior')
            sb.despine(fig, offset=5)
        self._old_de_population = self.de.population.copy()
        self._old_de_fitness = self.de._fitness.copy()

    def optimize_local(self, pv0=None, method='powell'):
        if pv0 is None:
            if self.de is not None:
                pv0 = self.de.minimum_location
            else:
                pv0 = self.ps.mean_pv
                pv0[self._sl_err] = log10(self.wn)
        res = minimize(lambda pv: -self.lnposterior(pv), pv0, method=method)
        self._local_minimization = res

    def sample_mcmc(self, niter: int = 500, thin: int = 5, repeats: int = 1, npop: int = None, population=None,
                    label='MCMC sampling', reset=True, leave=True, save=False, use_tqdm: bool = True):

        if save and self.result_dir is None:
            raise ValueError('The MCMC sampler is set to save the results, but the result directory is not set.')

        if self.sampler is None:
            if population is not None:
                pop0 = population
            elif hasattr(self, '_local_minimization') and self._local_minimization is not None:
                pop0 = multivariate_normal(self._local_minimization.x, diag(full(len(self.ps), 0.001 ** 2)), size=npop)
            elif self.de is not None:
                pop0 = self.de.population.copy()
            else:
                raise ValueError('Sample MCMC needs an initial population.')
            self.sampler = EnsembleSampler(pop0.shape[0], pop0.shape[1], self.lnposterior, vectorize=True)
        else:
            pop0 = self.sampler.chain[:,-1,:].copy()

        for i in tqdm(range(repeats), desc='MCMC sampling', disable=(not use_tqdm)):
            if reset or i > 0:
                self.sampler.reset()
            for _ in tqdm(self.sampler.sample(pop0, iterations=niter, thin=thin), total=niter,
                          desc='Run {:d}/{:d}'.format(i+1, repeats), leave=False, disable=(not use_tqdm)):
                pass
            if save:
                self.save(self.result_dir)
            pop0 = self.sampler.chain[:,-1,:].copy()

    def posterior_samples(self, burn: int = 0, thin: int = 1, derived_parameters: bool = True):
        fc = self.sampler.chain[:, burn::thin, :].reshape([-1, self.de.n_par])
        df = pd.DataFrame(fc, columns=self.ps.names)
        if derived_parameters:
            for k2c in df.columns[self._sl_k2]:
                df[k2c.replace('k2', 'k')] = sqrt(df[k2c])
            df['a'] = as_from_rhop(df.rho.values, df.p.values)
            df['inc'] = i_from_baew(df.b.values, df.a.values, 0., 0.)

            average_ks = sqrt(df.iloc[:, self._sl_k2]).mean(1).values
            df['t14'] = d_from_pkaiews(df.p.values, average_ks, df.a.values, df.inc.values, 0., 0., 1)
        return df

    def plot_mcmc_chains(self, pid: int=0, alpha: float=0.1, thin: int=1, ax=None):
        fig, ax = (None, ax) if ax is not None else subplots()
        ax.plot(self.sampler.chain[:, ::thin, pid].T, 'k', alpha=alpha)
        fig.tight_layout()
        return fig

    def save(self, save_path: Path = '.'):
        save_path = Path(save_path)

        if self.de:
            de = xa.DataArray(self.de.population, dims='pvector name'.split(), coords={'name': self.ps.names})
        else:
            de = None

        if self.sampler is not None:
            mc = xa.DataArray(self.sampler.chain, dims='pvector step name'.split(),
                              coords={'name': self.ps.names}, attrs={'ndim': self.de.n_par, 'npop': self.de.n_pop})
        else:
            mc = None

        ds = xa.Dataset(data_vars={'de_population_lm': de, 'lm_mcmc': mc},
                        attrs={'created': strftime('%Y-%m-%d %H:%M:%S'), 'target': self.name})
        ds.to_netcdf(save_path.joinpath(f'{self.name}.nc'))

    def plot_light_curves(self, method='de', ncol: int = 3, width: float = 2., max_samples: int = 1000, figsize=None,
                          data_alpha=0.5, ylim=None):
        nrow = int(ceil(self.nlc / ncol))
        if method == 'mcmc':
            df = self.posterior_samples(derived_parameters=False)
            t0, p = df.tc.median(), df.p.median()
            fmodel = self.flux_model(permutation(df.values)[:max_samples])
            fmperc = percentile(fmodel, [50, 16, 84, 2.5, 97.5], 0)
        else:
            fmodel = squeeze(self.flux_model(self.de.minimum_location))
            t0, p = self.de.minimum_location[0], self.de.minimum_location[1]
            fmperc = None

        fig, axs = subplots(nrow, ncol, figsize=figsize, constrained_layout=True, sharey='all', sharex='all',
                            squeeze=False)
        for i in range(self.nlc):
            ax = axs.flat[i]
            e = epoch(self.times[i].mean(), t0, p)
            tc = t0 + e * p
            time = self.times[i] - tc

            ax.plot(time, self.fluxes[i], '.', alpha=data_alpha)

            if method == 'de':
                ax.plot(time, fmodel[self.lcslices[i]], 'w', lw=4)
                ax.plot(time, fmodel[self.lcslices[i]], 'k', lw=1)
            else:
                ax.fill_between(time, *fmperc[3:5, self.lcslices[i]], alpha=0.15)
                ax.fill_between(time, *fmperc[1:3, self.lcslices[i]], alpha=0.25)
                ax.plot(time, fmperc[0, self.lcslices[i]])

            setp(ax, xlabel=f'Time - T$_c$ [d]', xlim=(-width / 2 / 24, width / 2 / 24))
        setp(axs[:, 0], ylabel='Normalised flux')

        if ylim is not None:
            setp(axs, ylim=ylim)

        for ax in axs.flat[self.nlc:]:
            ax.remove()
        return fig

    def __repr__(self):
        return f"Target: {self.name}\nLPF: {self._lpf_name}\n Passbands: {self.passbands}"
Esempio n. 8
0
def model_galaxy_mcmc(model_file,
                      output_name=None,
                      write_fits=default_filetypes,
                      iterations=0,
                      burn=0,
                      chains=None,
                      max_iterations=1,
                      convergence_check=check_convergence_autocorr):
    """
    Model the surface brightness distribution of a galaxy or galaxies using
    multi-component Markov Chain Monte Carlo parameter estimation.

    :param model_file: Filename of the model definition file. This should be
        a series of components from psfMC.ModelComponents, with parameters
        supplied as either fixed values or stochastics from psfMC.distributions
    :param output_name: Base name for output files (no file extension). By
        default, files are written out containing the requested image types
        (write_fits param) and the MCMC trace database. If None, use
        out_<model_filename>
    :param write_fits: List of which fits file types to write. By default, raw
        (unconvolved) model, convolved model, model IVM, residual, and point
        sources subtracted.
    :param iterations: Number of retained MCMC samples
    :param burn: Number of discarded (burn-in) MCMC samples
    :param chains: Number of individual chains (walkers) to run. If None, the
        minimum number recommended by emcee will be used. More is better.
    :param max_iterations: Maximum sampler iterations before convergence is
        enforced. Default is 1, which means sampler halts even if not converged.
    :param convergence_check: Function taking an emcee Sampler as argument, and
        returning True or False based on whether the sampler has converged.
        Default function returns True when the autocorrelation time of all
        stochastic variables is < 10% of the total number of samples. Sampling
        will be repeated (increasing the chain length) until convergence check
        is met or until max_iterations iterations are performed.
    """
    if output_name is None:
        output_name = 'out_' + model_file.replace('.py', '')
    output_name += '_{}'

    mc_model = MultiComponentModel(components=model_file)

    # If chains is not specified, use the minimum number recommended by emcee
    if chains is None:
        chains = 2 * mc_model.num_params + 2

    # FIXME: can't use threads=n right now because model object can't be pickled
    sampler = EnsembleSampler(nwalkers=chains,
                              dim=mc_model.num_params,
                              lnpostfn=mc_model.log_posterior,
                              kwargs={'model': mc_model})

    # Open database if it exists, otherwise pass backend to create a new one
    db_name = output_name.format('db') + '.fits'

    # TODO: Resume if database exists
    if not os.path.exists(db_name):
        param_vec = mc_model.init_params_from_priors(chains)

        # Run burn-in and discard
        for step, result in enumerate(
                sampler.sample(param_vec, iterations=burn)):
            # Set new initial sampler state
            param_vec = result[0]
            # No need to retain images from every step, so clear blobs
            sampler.clear_blobs()
            print_progress(step, burn, 'Burning')

        sampler.reset()

        converged = False
        for sampling_iter in range(max_iterations):
            # Now run real samples and retain
            for step, result in enumerate(
                    sampler.sample(param_vec, iterations=iterations)):
                mc_model.accumulate_images(result[3])
                # No need to retain images from every step, so clear blobs
                sampler.clear_blobs()
                print_progress(step, iterations, 'Sampling')

            if convergence_check(sampler):
                converged = True
                break
            else:
                warn('Not yet converged after {:d} iterations:'.format(
                    (sampling_iter + 1) * iterations))
                convergence_check(sampler, verbose=1)

        # Collect some metadata about the sampling process. These will be saved
        # in the FITS headers of both the output database and the images
        db_metadata = OrderedDict([('MCITER', sampler.chain.shape[1]),
                                   ('MCBURN', burn), ('MCCHAINS', chains),
                                   ('MCCONVRG', converged),
                                   ('MCACCEPT',
                                    sampler.acceptance_fraction.mean())])
        database = save_database(sampler,
                                 mc_model,
                                 db_name,
                                 meta_dict=db_metadata)
    else:
        print('Database already contains sampled chains, skipping sampling')
        database = load_database(db_name)

    # Write model output files
    save_posterior_images(mc_model,
                          database,
                          output_name=output_name,
                          filetypes=write_fits)
Esempio n. 9
0
class LogPosteriorFunction:
    _lpf_name = 'LogPosteriorFunction'

    def __init__(self, name: str, result_dir: Union[Path, str] = '.'):
        """The Log Posterior Function class.

        Parameters
        ----------
        name: str
            Name of the log posterior function instance.
        """
        self.name = name
        self.result_dir = Path(result_dir if result_dir is not None else '.')

        # Declare high-level objects
        # --------------------------
        self.ps = None  # Parametrisation
        self.de = None  # Differential evolution optimiser
        self.sampler = None  # MCMC sampler
        self._local_minimization = None

        # Initialise the additional lnprior list
        # --------------------------------------
        self._additional_log_priors = []

        self._old_de_fitness = None
        self._old_de_population = None

    def print_parameters(self, columns: int = 2):
        columns = max(1, columns)
        for i, p in enumerate(self.ps):
            print(p.__repr__(), end=('\n' if i % columns == columns - 1 else '\t'))

    def _init_parameters(self):
        self.ps = ParameterSet()
        self.ps.freeze()

    def create_pv_population(self, npop=50):
        return self.ps.sample_from_prior(npop)

    def set_prior(self, parameter, prior, *nargs) -> None:
        if isinstance(parameter, str):
            descriptions = self.ps.descriptions
            names = self.ps.names
            if parameter in descriptions:
                parameter = descriptions.index(parameter)
            elif parameter in names:
                parameter = names.index(parameter)
            else:
                params = ', '.join([f"{ln} ({sn})" for ln, sn in zip(self.ps.descriptions, self.ps.names)])
                raise ValueError(f'Parameter "{parameter}" not found from the parameter set: {params}')

        if isinstance(prior, str):
            if prior.lower() in ['n', 'np', 'normal']:
                prior = NP(nargs[0], nargs[1])
            elif prior.lower() in ['u', 'up', 'uniform']:
                prior = UP(nargs[0], nargs[1])
            else:
                raise ValueError(f'Unknown prior "{prior}". Allowed values are (N)ormal and (U)niform.')

        self.ps[parameter].prior = prior

    def lnprior(self, pv: ndarray) -> Union[Iterable, float]:
        """Log prior density for a 1D or 2D array of model parameters.

        Parameters
        ----------
        pv: ndarray
            Either a 1D parameter vector or a 2D parameter array.

        Returns
        -------
            Log prior density for the given parameter vector(s).
        """
        return self.ps.lnprior(pv) + self.additional_priors(pv)

    def additional_priors(self, pv):
        pv = atleast_2d(pv)
        return sum([f(pv) for f in self._additional_log_priors], 0)

    def lnlikelihood(self, pv):
        raise NotImplementedError

    def lnposterior(self, pv):
        lnp = self.lnprior(pv) + self.lnlikelihood(pv)
        return where(isfinite(lnp), lnp, -inf)

    def __call__(self, pv):
        return self.lnposterior(pv)

    def optimize_local(self, pv0=None, method='powell'):
        if pv0 is None:
            if self.de is not None:
                pv0 = self.de.minimum_location
            else:
                pv0 = self.ps.mean_pv
        res = minimize(lambda pv: -self.lnposterior(pv), pv0, method=method)
        self._local_minimization = res

    def optimize_global(self, niter=200, npop=50, population=None, pool=None, lnpost=None, vectorize=True,
                        label='Global optimisation', leave=False, plot_convergence: bool = True, use_tqdm: bool = True,
                        plot_parameters: tuple = (0, 2, 3, 4)):

        lnpost = lnpost or self.lnposterior
        if self.de is None:
            self.de = DiffEvol(lnpost, clip(self.ps.bounds, -1, 1), npop, maximize=True, vectorize=vectorize, pool=pool)
            if population is None:
                self.de._population[:, :] = self.create_pv_population(npop)
            else:
                self.de._population[:, :] = population
        for _ in tqdm(self.de(niter), total=niter, desc=label, leave=leave, disable=(not use_tqdm)):
            pass

        if plot_convergence:
            fig, axs = subplots(1, 1 + len(plot_parameters), figsize=(13, 2), constrained_layout=True)
            rfit = self.de._fitness
            mfit = isfinite(rfit)

            if self._old_de_fitness is not None:
                m = isfinite(self._old_de_fitness)
                axs[0].hist(-self._old_de_fitness[m], facecolor='midnightblue', bins=25, alpha=0.25)
            axs[0].hist(-rfit[mfit], facecolor='midnightblue', bins=25)

            for i, ax in zip(plot_parameters, axs[1:]):
                if self._old_de_fitness is not None:
                    m = isfinite(self._old_de_fitness)
                    ax.plot(self._old_de_population[m, i], -self._old_de_fitness[m], 'kx', alpha=0.25)
                ax.plot(self.de.population[mfit, i], -rfit[mfit], 'k.')
                ax.set_xlabel(self.ps.descriptions[i])
            setp(axs, yticks=[])
            setp(axs[1], ylabel='Log posterior')
            setp(axs[0], xlabel='Log posterior')
            sb.despine(fig, offset=5)
        self._old_de_population = self.de.population.copy()
        self._old_de_fitness = self.de._fitness.copy()

    def sample_mcmc(self, niter: int = 500, thin: int = 5, repeats: int = 1, npop: int = None, population=None,
                    label='MCMC sampling', reset=True, leave=True, save=False, use_tqdm: bool = True, pool=None,
                    lnpost=None, vectorize: bool = True):

        if save and self.result_dir is None:
            raise ValueError('The MCMC sampler is set to save the results, but the result directory is not set.')

        lnpost = lnpost or self.lnposterior
        if self.sampler is None:
            if population is not None:
                pop0 = population
            elif hasattr(self, '_local_minimization') and self._local_minimization is not None:
                pop0 = multivariate_normal(self._local_minimization.x, diag(full(len(self.ps), 0.001 ** 2)), size=npop)
            elif self.de is not None:
                pop0 = self.de.population.copy()
            else:
                raise ValueError('Sample MCMC needs an initial population.')
            self.sampler = EnsembleSampler(pop0.shape[0], pop0.shape[1], lnpost, vectorize=vectorize, pool=pool)
        else:
            pop0 = self.sampler.chain[:, -1, :].copy()

        for i in tqdm(range(repeats), desc=label, disable=(not use_tqdm), leave=leave):
            if reset or i > 0:
                self.sampler.reset()
            for _ in tqdm(self.sampler.sample(pop0, iterations=niter, thin=thin), total=niter,
                          desc='Run {:d}/{:d}'.format(i + 1, repeats), leave=False, disable=(not use_tqdm)):
                pass
            if save:
                self.save(self.result_dir)
            pop0 = self.sampler.chain[:, -1, :].copy()

    def posterior_samples(self, burn: int = 0, thin: int = 1):
        fc = self.sampler.chain[:, burn::thin, :].reshape([-1, len(self.ps)])
        df = pd.DataFrame(fc, columns=self.ps.names)
        return df

    def plot_mcmc_chains(self, pid: int = 0, alpha: float = 0.1, thin: int = 1, ax=None):
        fig, ax = (None, ax) if ax is not None else subplots()
        ax.plot(self.sampler.chain[:, ::thin, pid].T, 'k', alpha=alpha)
        fig.tight_layout()
        return fig

    def save(self, save_path: Path = '.'):
        save_path = Path(save_path)
        npar = len(self.ps)

        if self.de:
            de = xa.DataArray(self.de.population, dims='pvector parameter'.split(), coords={'parameter': self.ps.names})
        else:
            de = None

        if self.sampler is not None:
            mc = xa.DataArray(self.sampler.chain, dims='pvector step parameter'.split(),
                              coords={'parameter': self.ps.names}, attrs={'ndim': npar, 'npop': self.sampler.nwalkers})
        else:
            mc = None

        ds = xa.Dataset(data_vars={'de_population': de, 'mcmc_samples': mc},
                        attrs={'created': strftime('%Y-%m-%d %H:%M:%S'), 'name': self.name})
        ds.to_netcdf(save_path.joinpath(f'{self.name}.nc'))

        try:
            if self.sampler is not None:
                fname = save_path / f'{self.name}.fits'
                chains = self.sampler.chain
                nchains = chains.shape[0]
                nsteps = chains.shape[1]
                idch = repeat(arange(nchains), nsteps)
                idst = tile(arange(nsteps), nchains)
                flc = chains.reshape([-1, chains.shape[2]])
                tb1 = Table([idch, idst], names=['chain', 'step'])
                tb1.add_columns(flc.T, names=self.ps.names)
                tb2 = Table([idch, idst], names=['chain', 'step'])
                tb2.add_column(self.sampler.lnprobability.ravel(), name='lnp')
                tbhdu1 = pf.BinTableHDU(tb1, name='posterior')
                tbhdu2 = pf.BinTableHDU(tb2, name='sample_stats')
                hdul = pf.HDUList([pf.PrimaryHDU(), tbhdu1, tbhdu2])
                hdul.writeto(fname, overwrite=True)
        except ValueError:
            print('Could not save the samples in fits format.')

    def __repr__(self):
        return f"Target: {self.name}\nLPF: {self._lpf_name}"
Esempio n. 10
0
class BaseLPF:
    _lpf_name = 'base'

    def __init__(self, name: str, passbands: list, times: list = None, fluxes: list = None, errors: list = None,
                 pbids: list = None, covariates: list = None, tm: TransitModel = None,
                 nsamples: tuple = 1, exptimes: tuple = 0.):
        self.tm = tm or QuadraticModel(klims=(0.01, 0.75), nk=512, nz=512)

        # LPF name
        # --------
        self.name = name

        # Passbands
        # ---------
        # Should be arranged from blue to red
        if isinstance(passbands, (list, tuple, ndarray)):
            self.passbands = passbands
        else:
            self.passbands = [passbands]
        self.npb = npb = len(self.passbands)

        self.nsamples = None
        self.exptimes = None

        # Declare high-level objects
        # --------------------------
        self.ps = None          # Parametrisation
        self.de = None          # Differential evolution optimiser
        self.sampler = None     # MCMC sampler
        self.instrument = None  # Instrument
        self.ldsc = None        # Limb darkening set creator
        self.ldps = None        # Limb darkening profile set
        self.cntm = None        # Contamination model

        # Declare data arrays and variables
        # ---------------------------------
        self.nlc: int = 0                # Number of light curves
        self.times: list = None          # List of time arrays
        self.fluxes: list = None         # List of flux arrays
        self.errors: list = None         # List of flux uncertainties
        self.covariates: list = None     # List of covariates
        self.wn: ndarray = None          # Array of white noise estimates for each light curve
        self.timea: ndarray = None       # Array of concatenated times
        self.mfluxa: ndarray = None      # Array of concatenated model fluxes
        self.ofluxa: ndarray = None      # Array of concatenated observed fluxes
        self.errora: ndarray = None      # Array of concatenated model fluxes

        self.lcids: ndarray = None       # Array of light curve indices for each datapoint
        self.pbids: ndarray = None       # Array of passband indices for each light curve
        self.lcslices: list = None       # List of light curve slices

        # Set up the observation data
        # ---------------------------
        if times is not None and fluxes is not None and pbids is not None:
            self._init_data(times, fluxes, pbids, covariates, errors, nsamples, exptimes)

        # Setup parametrisation
        # =====================
        self._init_parameters()

        # Initialise the additional lnprior list
        # --------------------------------------
        self.lnpriors = []

        # Initialise the temporary arrays
        # -------------------------------
        self._zpv = zeros(6)
        self._tuv = zeros((npb, 2))
        self._zeros = zeros(npb)
        self._ones = ones(npb)

        # Inititalise the instrument
        self._init_instrument()

        if times is not None:
            self._bad_fluxes = [ones_like(t) for t in self.times]
        else:
            self._bad_fluxes = None


    def _init_data(self, times, fluxes, pbids, covariates=None, errors=None, nsamples=1, exptimes=0.):

        if isinstance(times, ndarray) and times.dtype == float:
            times = [times]

        if isinstance(fluxes, ndarray) and fluxes.dtype == float:
            fluxes = [fluxes]

        self.nlc = len(times)
        self.times = asarray(times)
        self.fluxes = asarray(fluxes)
        self.pbids = asarray(pbids)
        self.wn = [diff(f).std() / sqrt(2) for f in fluxes]
        self.timea = concatenate(self.times)
        self.ofluxa = concatenate(self.fluxes)
        self.mfluxa = zeros_like(self.ofluxa)
        self.pbids = atleast_1d(pbids).astype('int')
        self.lcids = concatenate([full(t.size, i) for i, t in enumerate(self.times)])

        if isscalar(nsamples):
            self.nsamples = full(self.nlc, nsamples)
            self.exptimes = full(self.nlc, exptimes)
        else:
            assert (len(nsamples) == self.nlc) and (len(exptimes) == self.nlc)
            self.nsamples = asarray(nsamples, 'int')
            self.exptimes = asarray(exptimes)

        self.tm.set_data(self.timea, self.lcids, self.pbids, self.nsamples, self.exptimes)

        if errors is None:
            self.errors = array([full(t.size, nan) for t in self.times])
        else:
            self.errors = asarray(errors)
        self.errora = concatenate(self.errors)

        # Initialise the light curves slices
        # ----------------------------------
        self.lcslices = []
        sstart = 0
        for i in range(self.nlc):
            s = self.times[i].size
            self.lcslices.append(s_[sstart:sstart + s])
            sstart += s

        # Initialise the covariate arrays, if given
        # -----------------------------------------
        if covariates is not None:
            self.covariates = covariates
            for cv in self.covariates:
                cv[:, 1:] = (cv[:, 1:] - cv[:, 1:].mean(0)) / cv[:, 1:].ptp(0)
            self.ncovs = self.covariates[0].shape[1]
            self.covsize = array([c.size for c in self.covariates])
            self.covstart = concatenate([[0], self.covsize.cumsum()[:-1]])
            self.cova = concatenate(self.covariates)

    def _init_parameters(self):
        self.ps = ParameterSet()
        self._init_p_orbit()
        self._init_p_planet()
        self._init_p_limb_darkening()
        self._init_p_baseline()
        self._init_p_noise()
        self.ps.freeze()

    def _init_p_orbit(self):
        """Orbit parameter initialisation.
        """
        porbit = [
            GParameter('tc',  'zero_epoch',       'd',      N(0.0,  0.1), (-inf, inf)),
            GParameter('pr',  'period',           'd',      N(1.0, 1e-5), (0,    inf)),
            GParameter('rho', 'stellar_density',  'g/cm^3', U(0.1, 25.0), (0,    inf)),
            GParameter('b',   'impact_parameter', 'R_s',    U(0.0,  1.0), (0,      1))]
        self.ps.add_global_block('orbit', porbit)

    def _init_p_planet(self):
        """Planet parameter initialisation.
        """
        pk2 = [PParameter('k2', 'area_ratio', 'A_s', GM(0.1), (0.01**2, 0.55**2))]
        self.ps.add_passband_block('k2', 1, 1, pk2)
        self._pid_k2 = repeat(self.ps.blocks[-1].start, self.npb)
        self._start_k2 = self.ps.blocks[-1].start
        self._sl_k2 = self.ps.blocks[-1].slice

    def _init_p_limb_darkening(self):
        """Limb darkening parameter initialisation.
        """
        pld = concatenate([
            [PParameter('q1_{:d}'.format(i), 'q1_coefficient', '', U(0, 1), bounds=(0, 1)),
             PParameter('q2_{:d}'.format(i), 'q2_coefficient', '', U(0, 1), bounds=(0, 1))]
            for i in range(self.npb)])
        self.ps.add_passband_block('ldc', 2, self.npb, pld)
        self._sl_ld = self.ps.blocks[-1].slice
        self._start_ld = self.ps.blocks[-1].start

    def _init_p_baseline(self):
        """Baseline parameter initialisation.
        """
        pass

    def _init_p_noise(self):
        """Noise parameter initialisation.
        """
        pns = [LParameter('lne_{:d}'.format(i), 'log_error_{:d}'.format(i), '', U(-8, -0), bounds=(-8, -0)) for i in range(self.nlc)]
        self.ps.add_lightcurve_block('log_err', 1, self.nlc, pns)
        self._sl_err = self.ps.blocks[-1].slice
        self._start_err = self.ps.blocks[-1].start

    def _init_instrument(self):
        pass

    def create_pv_population(self, npop=50):
        pvp = self.ps.sample_from_prior(npop)
        for sl in self.ps.blocks[1].slices:
            pvp[:,sl] = uniform(0.01**2, 0.25**2, size=(npop, 1))

        # With LDTk
        # ---------
        #
        # Use LDTk to create the sample if LDTk has been initialised.
        #
        if self.ldps:
            istart = self._start_ld
            cms, ces = self.ldps.coeffs_tq()
            for i, (cm, ce) in enumerate(zip(cms.flat, ces.flat)):
                pvp[:, i + istart] = normal(cm, ce, size=pvp.shape[0])

        # No LDTk
        # -------
        #
        # Ensure that the total limb darkening decreases towards
        # red passbands.
        #
        else:
            ldsl = self._sl_ld
            for i in range(pvp.shape[0]):
                pid = argsort(pvp[i, ldsl][::2])[::-1]
                pvp[i, ldsl][::2] = pvp[i, ldsl][::2][pid]
                pvp[i, ldsl][1::2] = pvp[i, ldsl][1::2][pid]

        # Estimate white noise from the data
        # ----------------------------------
        for i in range(self.nlc):
            wn = diff(self.ofluxa).std() / sqrt(2)
            pvp[:, self._start_err] = log10(uniform(0.5*wn, 2*wn, size=npop))
        return pvp

    def baseline(self, pv):
        """Multiplicative baseline"""
        return 1.

    def trends(self, pv):
        """Additive trends"""
        return 0.

    def transit_model(self, pv):
        pv = atleast_2d(pv)
        pvp = map_pv(pv)
        ldc = map_ldc(pv[:,self._sl_ld])
        flux = self.tm.evaluate_pv(pvp, ldc)
        return flux

    def flux_model(self, pv):
        baseline    = self.baseline(pv)
        trends      = self.trends(pv)
        model_flux = self.transit_model(pv)
        return baseline * model_flux + trends

    def residuals(self, pv):
        return self.ofluxa - self.flux_model(pv)

    def set_prior(self, pid: int, prior) -> None:
            self.ps[pid].prior = prior

    def add_t14_prior(self, mean: float, std: float) -> None:
        """Add a normal prior on the transit duration.

        Parameters
        ----------
        mean
        std

        Returns
        -------

        """
        def T14(pv):
            a = as_from_rhop(pv[2], pv[1])
            t14 = duration_eccentric(pv[1], sqrt(pv[4]), a, mt.acos(pv[3] / a), 0, 0, 1)
            return norm.logpdf(t14, mean, std)
        self.lnpriors.append(T14)

    def add_as_prior(self, mean: float, std: float) -> None:
        """Add a prior on the scaled semi-major axis

        Parameters
        ----------
        mean
        std

        Returns
        -------

        """
        def as_prior(pv):
            a = as_from_rhop(pv[2], pv[1])
            return norm.logpdf(a, mean, std)
        self.lnpriors.append(as_prior)

    def add_ldtk_prior(self, teff: tuple, logg: tuple, z: tuple,
                       uncertainty_multiplier: float = 3,
                       pbs: tuple = ('g', 'r', 'i', 'z')) -> None:
        """Add a LDTk-based prior on the limb darkening.

        Parameters
        ----------
        teff
        logg
        z
        uncertainty_multiplier
        pbs

        Returns
        -------

        """
        fs = {n: f for n, f in zip('g r i z'.split(), (sdss_g, sdss_r, sdss_i, sdss_z))}
        filters = [fs[k] for k in pbs]
        self.ldsc = LDPSetCreator(teff, logg, z, filters)
        self.ldps = self.ldsc.create_profiles(1000)
        self.ldps.resample_linear_z()
        self.ldps.set_uncertainty_multiplier(uncertainty_multiplier)
        def ldprior(pv):
            return self.ldps.lnlike_tq(pv[self._sl_ld])
        self.lnpriors.append(ldprior)

    def remove_outliers(self, sigma=5):
        fmodel = self.flux_model(self.de.minimum_location)
        times, fluxes, pbids, errors = [], [], [], []
        for i in range(len(self.times)):
            res = self.fluxes[i] - fmodel[i]
            mask = ~sigma_clip(res, sigma=sigma).mask
            times.append(self.times[i][mask])
            fluxes.append(self.fluxes[i][mask])
            if self.errors is not None:
                errors.append(self.errors[i][mask])
        self._init_data(times, fluxes, self.pbids, (errors if self.errors is not None else None))

    def remove_transits(self, tids):
        m = ones(len(self.times), bool)
        m[tids] = False
        self._init_data(self.times[m], self.fluxes[m], self.pbids[m],
                        self.covariates[m] if self.covariates is not None else None,
                        self.errors[m], self.nsamples[m], self.exptimes[m])
        self._init_parameters()

    def lnprior(self, pv):
        return self.ps.lnprior(pv) + self.additional_priors(pv)

    def additional_priors(self, pv):
        """Additional priors."""
        pv = atleast_2d(pv)
        return sum([f(pv) for f in self.lnpriors], 0)

    def lnlikelihood(self, pv):
        flux_m = self.flux_model(pv)
        wn = 10**(atleast_2d(pv)[:,self._sl_err])
        return lnlike_normal_v(self.ofluxa, flux_m, wn, self.lcids)

    def lnposterior(self, pv):
        lnp = self.lnprior(pv) + self.lnlikelihood(pv)
        return where(isfinite(lnp), lnp, -inf)

    def __call__(self, pv):
        return self.lnposterior(pv)

    def optimize_global(self, niter=200, npop=50, population=None, label='Global optimisation', leave=False):
        if self.de is None:
            self.de = DiffEvol(self.lnposterior, clip(self.ps.bounds, -1, 1), npop, maximize=True, vectorize=True)
            if population is None:
                self.de._population[:, :] = self.create_pv_population(npop)
            else:
                self.de._population[:,:] = population
        for _ in tqdm(self.de(niter), total=niter, desc=label, leave=leave):
            pass

    def sample_mcmc(self, niter=500, thin=5, label='MCMC sampling', reset=False, leave=True):
        if self.sampler is None:
            self.sampler = EnsembleSampler(self.de.n_pop, self.de.n_par, self.lnposterior, vectorize=True)
            pop0 = self.de.population
        else:
            pop0 = self.sampler.chain[:,-1,:].copy()
        if reset:
            self.sampler.reset()
        for _ in tqdm(self.sampler.sample(pop0, iterations=niter, thin=thin), total=niter, desc=label, leave=False):
            pass

    def posterior_samples(self, burn: int=0, thin: int=1, include_ldc: bool=False):
        ldstart = self._sl_ld.start
        fc = self.sampler.chain[:, burn::thin, :].reshape([-1, self.de.n_par])
        d = fc if include_ldc else fc[:, :ldstart]
        n = self.ps.names if include_ldc else self.ps.names[:ldstart]
        return pd.DataFrame(d, columns=n)

    def plot_mcmc_chains(self, pid: int=0, alpha: float=0.1, thin: int=1, ax=None):
        fig, ax = (None, ax) if ax is not None else subplots()
        ax.plot(self.sampler.chain[:, ::thin, pid].T, 'k', alpha=alpha)
        fig.tight_layout()
        return fig


    def __repr__(self):
        s  = f"""Target: {self.name}
  LPF: {self._lpf_name}
  Passbands: {self.passbands}"""
        return s
sampler.reset()
end = time()
print("Total runtime for burn-in = {0:.2f} seconds".format(end-start))

outname = "bulge_disk_power_law_chain_aggressive_priors.dat"


sep = "  "
formatter = sep.join("{"+str(i)+":.6f}" for i in range(pos0.shape[-1])) + "  " + "{"+str(pos0.shape[-1])+":.6f}\n"
header = "norm_bulge  bulge_to_disk_size_ratio  alpha_bulge  alpha_disk  scatter  lnprob\n"

start = time()

print("...Running MCMC with {0} chain elements".format(mcmc_num_iteration*nwalkers))
with open(outname, "wb") as f:
    f.write(header)
    for result in sampler.sample(pos0, iterations=mcmc_num_iteration, storechain=False):
        pos, prob, state = result
        for a, b in zip(pos, prob):
            newline = formatter.format(*np.append(a, b))
            f.write(newline)
end = time()
print("Runtime for MCMC = {0:.2f} minutes".format((end-start)/60.))
print("\a\a\a")

chain = Table.read(outname, format='ascii')
print("Successfully loaded chain with {0} elements from disk after completion of MCMC".format(len(chain)))
chain.sort('lnprob')
print("Best fitting model:\n")
print(chain[-1])
Esempio n. 12
0
class Sampler:
    """
    wrapper of emcee.EnsembleSampler. 
    """
    def __init__(self, lnpost, p0, keys, nwalkers=120):
        self.lnpost = lnpost
        self.sampler = EnsembleSampler(nwalkers,
                                       p0.shape[1],
                                       lnpost,
                                       threads=15)
        self.p0 = p0
        self.p = p0
        self.keys = keys
        self.ndim = len(keys)

    def reset_sampler(self):
        self.sampler.reset()

    def sample(self, n_sample, burnin=False):
        """
        execute mcmc for given iteration steps.
        """
        desc = "burnin" if burnin else "sample"
        iteration = tqdm(self.sampler.sample(self.p, iterations=n_sample),
                         total=n_sample,
                         desc=desc)
        for _ret in iteration:
            self.p = _ret[0] if emcee_major_version == "2" else _ret.coords
            lnposts = _ret[1]
            iteration.set_postfix(
                lnpost_min=f"{np.min(lnposts):.5e}",  #np.min(lnposts),
                lnpost_max=f"{np.max(lnposts):.5e}",  #np.max(lnposts),
                lnpost_mean=f"{np.mean(lnposts):.5e}"  #np.mean(lnposts)
            )
        if burnin:
            self.reset_sampler()

    @property
    def df(self):
        """
        convert sampler.chain into pandas.DataFrame for convenience.
        """
        _df = DF(self.sampler.flatchain)
        _df = _df.rename(columns={i: key for i, key in enumerate(self.keys)})
        _df["lnpost"] = self.sampler.flatlnprobability
        return _df

    def save_chain(self, fname):
        self.df.to_pickle(fname)

    def plot_chain(self, kwargs_subplots={}, **kwargs):
        fig, ax = plt.subplots(self.ndim + 1, **kwargs_subplots)
        for i in range(self.ndim):
            ax[i].plot(self.sampler.chain[:, :, i].T,
                       **kwargs)  # [nwalkers,nsample,ndim]
            ax[i].set_ylabel(self.keys[i])
        ax[self.ndim].plot(self.sampler.lnprobability.T,
                           **kwargs)  # [nwalkers,nsample,ndim]
        ax[self.ndim].set_ylabel("lnpost")

    def plot_hist(self, **kwargs):
        self.df.hist(**kwargs)

    def map_estimater(self):
        _i = self.df.lnpost.idxmax()
        return self.df.iloc[i]
Esempio n. 13
0
class EmceeEnsembleSampler:

    def __init__(self, params, likelihood, pool=None, it=0):

        # Storage
        if pool:
            self.n_cores = pool.size + 1
        else:
            self.n_cores = 1
        self.likelihood = likelihood
        self.p = params
        self.pre_burn_position = None
        self.post_burn_position = None

        # File for writing progress
        self.prog_fname = (
                os.environ['RESULTS'] + ('/.progress/000_000_%03d.txt' % it)
        )

    def run_sampling(self, pool=None):

        # Take the time at the start of sampling
        self.sample_start_time = dt.now()

        # Respawn the walkers from the final burn-in position
        self.redistribute_walkers()

        # Initialise new sampler for final chain
        self.final_sampler = EnsembleSampler(
            self.p['mcmc']['walkers_initial'] * self.p['mcmc']['walkers_factor'],
            len(self.likelihood.mu), self.likelihood, pool=pool
        )

        # Run the sampler and write progress to file
        for i, a in enumerate(
                self.final_sampler.sample(self.post_burn_position,
                                          iterations=(self.p['mcmc']['final_iterations'] + 10))
        ):
            if check_master(pool):
                with open(self.prog_fname, 'w') as f:
                    f.write(self.write_progress(i, self.p['mcmc']['final_iterations'] + 10,
                                                self.sample_start_time, 'S'))

        # Record the finish time
        self.sample_finish_time = dt.now()

        # Prune the chain to remove dead walkers and drop second burn-in
        self.format_chain()

    def run_burn_in(self, pool=None):

        # Initialise sampler for burn-in
        self.burn_in_sampler = EnsembleSampler(
            self.p['mcmc']['walkers_initial'],
            len(self.likelihood.mu), self.likelihood, pool=pool
        )

        # Record start time
        self.burn_start_time = dt.now()

        # Initialise walkers
        self.walker_init()

        # Run the sampler and write progress to file
        for i, a in enumerate(
                self.burn_in_sampler.sample(self.pre_burn_position,
                                            iterations=self.p['mcmc']['burn_in_iterations'])
        ):

            if check_master(pool):
                with open(self.prog_fname, 'w') as f:
                    f.write(self.write_progress(i, self.p['mcmc']['burn_in_iterations'],
                                                self.burn_start_time, 'B'))

        # Save the chain
        self.burn_chain = self.burn_in_sampler.chain

    def walker_init(self):
        """
        Initialises the positions of the walkers as either
        - Gaussian ball around the max. likelihood (if centre=True)
        - Uniformly distributed across the prior volume (if centre=False)
        """

        # Get features of the likelihood
        means = self.likelihood.mu
        stdev = self.likelihood.widths
        prior = self.likelihood.priors
        n_w = self.p['mcmc']['walkers_initial']

        if self.p['mcmc']['start_walkers'] == 'Centre':
            p0 = [np.clip(np.random.normal(means[i], stdev[i], n_w).astype('float32'), prior[i][0], prior[i][1])
                  for i in range(len(self.likelihood.mu))]
        else:
            p0 = [np.random.uniform(prior[i][0], prior[i][1], n_w).astype('float32')
                  for i in range(len(self.likelihood.mu))]

        self.pre_burn_position = np.transpose(np.array(p0))

    def redistribute_walkers(self):
        """
        Given a chain of walkers, creates {factor} copies of the walkers
        distributed in Gaussian kernels around each point. The original points
        are retained to further speed up burn-in
        """

        # Get the final position
        p0 = self.burn_chain[:, -1, :]

        if self.p['mcmc']['walkers_factor'] > 1:

            # Get variance in each dimension at final position
            v = np.array(0.1 * p0.std(axis=0))

            # Get factor - 1 coordinates around each point
            p1 = []
            for p in p0:
                p1.append(np.array([np.random.normal(p[i], 0.1 * v[i], self.p['mcmc']['walkers_factor'] - 1)
                                    for i in range(len(v))]).transpose())
            p1 = np.array(p1)

            # Add the 1 set of real samples to the new set of factor - 1 fake samples
            self.post_burn_position = np.hstack((p0.reshape(len(p0), 1, self.burn_chain.shape[-1]), p1)).reshape(
                len(self.burn_chain) * self.p['mcmc']['walkers_factor'],
                self.burn_chain.shape[-1]
            ).astype('float32')

        else:

            self.post_burn_position = p0

    def format_chain(self, eps=0.05):

        if self.p['options']['prune']:

            prune_params = []
            for i, model in enumerate(self.p['options']['prune_params']):
                for param in self.p['options']['prune_params'][model]:
                    if i == 0:
                        lens_offset = 0
                    else:
                        lens_offset = len(self.p['model']['active_parameters']['lens'])
                    prune_params.append(self.p['model']['active_parameters'][model].index(param) + lens_offset)

            # Find the clusters via DBSCAN
            dbscan = DBSCAN(eps=eps)
            clusters = dbscan.fit_predict(self.final_sampler.chain[:, :, prune_params].mean(axis=1))

            # Get the walkers belonging to the most populous cluster
            self.prune_list = np.where(clusters == mode(clusters)[0][0])[0]

            # Discard pruned walkers
            self.final_chain = self.final_sampler.chain[self.prune_list, 10:, :]

            # Discard pruned likelihoods
            self.logprob = self.final_sampler.lnprobability[self.prune_list, 10:]

        else:

            self.final_chain = self.final_sampler.chain[:, 10:, :]
            self.logprob = self.final_sampler.lnprobability[:, 10:]

    def stats(self):
        text = """Sampler                {{ sm }}
No. Cores              {{ nc }}
Start Time             {{ st }}
End Time               {{ et }}
Time Taken             {{ tt }}
Acceptance Fraction    {{ af }}
Prune Fraction         {{ pf }}"""

        if self.p['options']['prune']:
            pf = round(1.0 - (float(len(self.prune_list)) /
                              float(self.post_burn_position.shape[0])), 4)
        else:
            pf = 0.0
        af = round(np.mean(self.final_sampler.acceptance_fraction), 4)
        t = Template(text)
        td = str(self.sample_finish_time - self.burn_start_time).split('.')[0]

        return t.render(sm='Emcee', af=af, pf=pf,
                        nc=self.n_cores,
                        st=self.burn_start_time.strftime("%D %H:%M"),
                        et=self.sample_finish_time.strftime("%D %H:%M"),
                        tt=td)

    @staticmethod
    def write_progress(j, n, t0, s, w=20):

        # Fraction of run complete
        frac = float(j + 1) / float(n)

        # Avg. time per iteration so far and time left in seconds
        time_per = float((dt.now() - t0).seconds) / float(j + 1)
        time_lft = float(time_per * (n - (j + 1)))

        # Finish time based on previous
        pred_time_h = floor(time_lft / 3600.0)
        pred_time_m = floor((time_lft % 3600) / 60.0)

        # Write progress string
        prog = (('%5d/%5d |' % (j + 1, n) + '#' * int(frac * w) +
                 ' ' * (w - int(frac * w)) + ('| %3d%% %s' % (int(100 * frac), s)) +
                 (' | %02dh%02dm' % (pred_time_h, pred_time_m))))

        return prog
Esempio n. 14
0
from sisters.model import GaussianPriorND, lnpostfn
from sisters.io import load_stars

from emcee import EnsembleSampler

model = GaussianPriorND(dimensions=['age'])

rp = {'nwalkers': 128,
      'niter': 256,
      'nout': 2000, # number of samples to draw from the minesweeper chains
      }
#NSP = No spectral info, SP= with Spectral info
files = glob.glob('../data/hyades/*_NSP.dat') 
star_chains = load_stars(files, **rp)

postkwargs = {'samples': star_chains,
              'model': model}
esampler = EnsembleSampler(rp['nwalkers'], model.ndim, lnpostfn,
                           kwargs=postkwargs)

initial = [0.5, 0.2]
initial = [np.random.normal(loc=i, scale=0.1 * i, size=(rp['nwalkers']))
           for i in initial]
initial = np.array(initial).T

for i, result in enumerate(esampler.sample(initial, iterations=rp['niter'],
                                           storechain=True)):

        if (i % 10) == 0:
            print(i)
Esempio n. 15
0
class LPFunction(object):
    """A basic log posterior function class.
    """

    def __init__(self, time, flux, nthreads=1):

        # Set up the transit model
        # ------------------------
        self.tm = MA(interpolate=True, klims=(0.08, 0.13), nthr=nthreads)
        self.nthr = nthreads

        # Initialise data
        # ---------------
        self.time = time.copy() if time is not None else array([])
        self.flux_o = flux.copy() if flux is not None else array([])
        self.npt = self.time.size

        # Set the optimiser and the MCMC sampler
        # --------------------------------------
        self.de = None
        self.sampler = None

        # Set up the parametrisation and priors
        # -------------------------------------
        psystem = [
            GParameter('tc', 'zero_epoch', 'd', NP(1.01, 0.02), (-inf, inf)),
            GParameter('pr', 'period', 'd', NP(2.50, 1e-7), (0, inf)),
            GParameter('rho', 'stellar_density', 'g/cm^3', UP(0.90, 2.50), (0.90, 2.5)),
            GParameter('b', 'impact_parameter', 'R_s', UP(0.00, 1.00), (0.00, 1.0)),
            GParameter('k2', 'area_ratio', 'A_s', UP(0.08 ** 2, 0.13 ** 2), (1e-8, inf))]

        pld = [
            PParameter('q1', 'q1_coefficient', '', UP(0, 1), bounds=(0, 1)),
            PParameter('q2', 'q2_coefficient', '', UP(0, 1), bounds=(0, 1))]

        pbl = [LParameter('es', 'white_noise', '', UP(1e-6, 1e-2), bounds=(1e-6, 1e-2))]
        per = [LParameter('bl', 'baseline', '', NP(1.00, 0.001), bounds=(0.8, 1.2))]

        self.ps = ParameterSet()
        self.ps.add_global_block('system', psystem)
        self.ps.add_passband_block('ldc', 2, 1, pld)
        self.ps.add_lightcurve_block('baseline', 1, 1, pbl)
        self.ps.add_lightcurve_block('error', 1, 1, per)
        self.ps.freeze()

    def compute_baseline(self, pv):
        """Constant baseline model"""
        return full_like(self.flux_o, pv[8])

    def compute_transit(self, pv):
        """Transit model"""
        _a = as_from_rhop(pv[2], pv[1])  # Scaled semi-major axis from stellar density and orbital period
        _i = mt.acos(pv[3] / _a)  # Inclination from impact parameter and semi-major axis
        _k = mt.sqrt(pv[4])  # Radius ratio from area ratio

        a, b = mt.sqrt(pv[5]), 2 * pv[6]
        _uv = array([a * b, a * (1. - b)])  # Quadratic limb darkening coefficients

        return self.tm.evaluate(self.time, _k, _uv, pv[0], pv[1], _a, _i)

    def compute_lc_model(self, pv):
        """Combined baseline and transit model"""
        return self.compute_baseline(pv) * self.compute_transit(pv)

    def lnprior(self, pv):
        """Log prior"""
        if any(pv < self.ps.lbounds) or any(pv > self.ps.ubounds):
            return -inf
        else:
            return self.ps.lnprior(pv)

    def lnlikelihood(self, pv):
        """Log likelihood"""
        flux_m = self.compute_lc_model(pv)
        return ll_normal_es(self.flux_o, flux_m, pv[7])

    def lnposterior(self, pv):
        """Log posterior"""
        lnprior = self.lnprior(pv)
        if isinf(lnprior):
            return lnprior
        else:
            return lnprior + self.lnlikelihood(pv)

    def create_pv_population(self, npop=50):
        return self.ps.sample_from_prior(npop)

    def optimize(self, niter=200, npop=50, population=None, label='Optimisation'):
        """Global optimisation using Differential evolution"""
        if self.de is None:
            self.de = DiffEvol(self.lnposterior, clip(self.ps.bounds, -1, 1), npop, maximize=True)
            if population is None:
                self.de._population[:, :] = self.create_pv_population(npop)
            else:
                self.de._population[:, :] = population
        for _ in tqdm(self.de(niter), total=niter, desc=label):
            pass

    def sample(self, niter=500, thin=5, label='MCMC sampling', reset=False):
        """MCMC sampling using emcee"""
        if self.sampler is None:
            self.sampler = EnsembleSampler(self.de.n_pop, self.de.n_par, self.lnposterior)
            pop0 = self.de.population
        else:
            pop0 = self.sampler.chain[:, -1, :].copy()
        if reset:
            self.sampler.reset()
        for _ in tqdm(self.sampler.sample(pop0, iterations=niter, thin=thin), total=niter, desc=label):
            pass
Esempio n. 16
0
class Sampler:
    """
    wrapper of emcee.EnsembleSampler. 
    """
    def __init__(self, lnpost, p0, nwalkers=120, blobs_dtype=float):
        """
        init
        """

        self.lnpost = lnpost
        blobs_dtype = blobs_dtype  # Note: Here dtype must be specified, otherwise an error happens. #[("lnlike",float),]
        self.sampler = EnsembleSampler(
            nwalkers, p0.shape[1], lnpost, blobs_dtype=blobs_dtype
        )  # NOTE: dtype must be list of tuple (not tuple of tuple)
        self.p0 = p0
        self.p_last = p0
        self.ndim = p0.shape[1]

    def reset_sampler(self):
        self.sampler.reset()

    def sample(self, n_sample, burnin=False, use_pool=False):
        """
        execute mcmc for given iteration steps.
        """
        desc = "burnin" if burnin else "sample"

        with Pool() as pool:
            self.sampler.pool = pool if use_pool else None
            iteration = tqdm(self.sampler.sample(self.p_last,
                                                 iterations=n_sample),
                             total=n_sample,
                             desc=desc)
            for _ret in iteration:
                self.p_last = _ret.coords  # if uses_emcee3 else _ret[0]  # for emcee2
                lnposts = _ret.log_prob  # if uses_emcee3 else _ret[1]  # for emcee2
                iteration.set_postfix(lnpost_min=np.min(lnposts),
                                      lnpost_max=np.max(lnposts),
                                      lnpost_mean=np.mean(lnposts))
            if burnin:
                self.reset_sampler()

    def get_chain(self, **kwargs):
        return self.sampler.get_chain(**kwargs)

    def get_log_prob(self, **kwargs):
        return self.sampler.get_log_prob(**kwargs)

    def get_blobs(self, **kwargs):
        return self.sampler.get_blobs(**kwargs)

    def get_last_sample(self, **kwargs):
        return self.sampler.get_last_sample(**kwargs)

    def _save(self, fname_base):
        np.save(fname_base + "_chain.npy", self.get_chain())
        np.save(fname_base + "_lnprob.npy", self.get_log_prob())
        np.save(fname_base + "_lnlike.npy", self.get_blobs())

    def save(self, fname_base):
        '''
        Save MCMC results into "<fname_base>_chain/lnprob/lnlike.npy".
        If fname_base is like "your_directory/your_prefix", create "your_directory" before saving.
        '''
        dirname = os.path.dirname(fname_base)
        if dirname == "":
            self._save(fname_base)
        else:
            if not os.path.isdir(dirname): os.mkdir(dirname)
            self._save(fname_base)

    def save_pickle(self, fname_base, overwrite=False):
        fname = fname_base + '_.gz'
        if os.path.exists(fname):
            if overwrite:
                warn(f"{fname} exsits already. It will be overwritten.")
            else:
                raise RuntimeError(
                    f"{fname} exsits already. If you want to overwrite it, set \"overwrite=True\"."
                )
        data = pickle.dumps(self)
        with gzip.open(fname, mode='wb') as fp:
            fp.write(data)
Esempio n. 17
0
def sample(hdf5_file, niter, threads=None):
    """Sample from the DynamicalModel instance.

    Parameters
    ----------
    hdf5_file : str
        hdf5 filename
    niter : int
        number of iterations
    threads : int, optional
        number of threads to use
        if None, then default to the maximum allowed threads
    """
    slomo_version = io._version_string()
    hdf5_version = io.read_dataset(hdf5_file, "version")
    if slomo_version != hdf5_version:
        printf("Version of hdf5 file ({}) doesn't match the version of slomo"
               " ({})!".format(hdf5_version, slomo_version))
    model = io.read_model(hdf5_file)

    settings = io.read_group(hdf5_file, "settings")
    nwalkers = settings['nwalkers']
    ndim = len(model.params)

    max_threads = psutil.cpu_count(logical=False)
    if threads is None:
        threads = max_threads
    threads = int(threads)
    if threads < 0:
        threads = max_threads + threads
    if threads > 1:
        pool = multiprocess.Pool(threads)
    else:
        pool = None

    sampler = EnsembleSampler(nwalkers,
                              ndim,
                              model,
                              threads=threads,
                              pool=pool)

    if io.chain_shape(hdf5_file)[1] == 0:
        # starting new chain
        initial_guess = np.array(model.params._values)
        spread = 1e-4 * initial_guess
        positions = [
            initial_guess + spread * np.random.randn(ndim)
            for i in range(nwalkers)
        ]
    else:
        # override the given inital guess positions with the last walker positions
        positions = io.read_dataset(hdf5_file, "chain")[:, -1, :]

    count = 0
    start_time = time.time()
    for result in sampler.sample(positions, iterations=niter,
                                 storechain=False):
        printf('Iteration {:4d}: {:.4e} s'.format(count + 1,
                                                  time.time() - start_time))
        count += 1
        new_positions = result[0]
        io.append_to_chain(hdf5_file, new_positions)
    return sampler
Esempio n. 18
0
File: sampler.py Progetto: l-sz/SiDE
def run_mcmc(main_dir,
             uvdata,
             paramfile='model_param.inp',
             nthreads=1,
             nthreads_openmp=1,
             nwalkers=40,
             nsteps=300,
             nburnin=0,
             use_mpi=False,
             loadbalance=False,
             verbose=False,
             resume=False,
             sloppy=False,
             chain_file='chain.dat',
             restart_file='chain.dat',
             impar=None,
             parname=None,
             p_ranges=None,
             p0=None,
             p_form=None,
             p_formprior=None,
             p_sigma=None,
             debug=False,
             kwargs=None,
             dpc=1.0,
             incl=60.):
    '''
    Computes posteriori probabilities of parametrised Class 0/I models given 
    a set of observational constraints and radiative transfer model parameters.

    The observational constraints (visibility data) must be provided in the 
    uvdata argument. The radiative transfer model parameters must be provided 
    in the paramfile. The fitted parameters (specified in parname argument) 
    will be changed during the MCMC procedure. The initial values of the fitted 
    parameters are provided in the p0 argument. All other model parameters are 
    as set in the paramfile.
    
    In version 0.1.1 only uniform prior is possible (1 within the parameter range,
    0 outside of the range). The ranges of the uniform prior are given in the 
    p_ranges argument.

    The MCMC chains are saved in a Python readable pickle file (chain.p) and 
    in an ASCII file (chain.dat). The pickle file contains additional information 
    (e.g. uniform prior ranges, observational constraints, meta-data), but it is 
    only written at the successful finish of the MCMC run. The ASCII output 
    saves only the current parameters of the chains and the model likelihood, 
    but it is updated after each MCMC step. This file can be used to restart 
    runs that ended before completion (e.g. because the allocated time run out 
    on the cluster).
    
    The resulting chain and meta-data is also returned as a dictionary.

    Parameters
    ----------
    main_dir : string
             Directory containing input and output files.
    uvdata   : dictionary or list of dictionaries
             Input observed visibility data at a single or multiple wavelengths.
             The chi^2 of the models is computed compared to the uvdata datasets.
             The dictionary should contains the u, v, Re, Im, w, wav keywords. 
    paramfile: string
             Name of the radmc3dModel parameter file. The parameter file is 
             necessary. Default is 'model_param.inp'.
    nthreads : int
             Number of threads used in multiprocessing mode. In MPI mode the 
             parameter is ignored. Default is 1.
    nthreads_openmp : int
             Number of OpenMP threads used by RADMC-3D and galario. It is used 
             both in MPI and multiprocessing modes. nthreads_openmp should not 
             be larger than the total number of CPU threads. Default is 1.
    nwalkers : int
             Number of walkers used. Default is 40.
    nsteps   : int
             Number of walker steps in the main run. Default is 300.
    nburnin  : int
             Number of walker steps in initial "burn-in" run. Default is 0.
    use_mpi  : bool
             Use MPI pools instead of python threads. Useful for running 
             on computer clusters using multiple nodes. Default is False.
    loadbalance : bool
             When the MPI mode is used and the runtime of individual log-probability
             function calls vary significantly and ntask > Ncpu, then setting this
             parameter to True may improve the overall computational speed. 
             Default is False.
    verbose  : bool
             If True then write detailed information messages to the standard 
             output. Default is False.
    resume   : bool
             If True then resume MCMC chain from file and continue sampling 
             the posterior distribution. nwalkers should not change between 
             runs. Default is False.
    sloppy   : bool
             If True then RADMC-3D relaxed the sub-pixel refinement criterion 
             when raytracing. This may reduce runtime. Please don not forget to 
             check best fit models with higher accuracy before publishing results.
             Default is False.
    chain_file : string, optional
             Chain (parameters and probabilities) are stored in this file. 
             The file can be used to restart or continue MCMC sampling.
             Meaning of columns: walker index (1), parameter value (n), 
             log probability (1).
             If file already exists then output is automatically renamed, 
             this is done in order not to overwrite previous results and 
             the restart_file (see below).
             Default is "chain.dat".
    restart_file: string, optional
             When restarting (resume = True), then results from previous 
             run are read from restart_file. If resume parameter is set 
             True, then file must exist.
             Default is "chain.dat".
    impar    : dict or list of dict, optional
             Image parameter(s). Known keywords are listed in the runImage()
             method description. At least the wavelength (wav keyword) must 
             be set for each images. Default is None.
    parname  : list, string
             Names of fitted parameters (string). The known parameter names are 
             those given in the parameter file. Required to run the fitting. 
             Default is None.
    p_range  : list of lists
             Uniform prior parameter ranges. For each parameter it should contain 
             a two element list with the minimum and maximum values. Within these 
             ranges the prior probability is 1, outside it is 0. Must have as 
             many elements at p0.
             Default is None.
    p0       : list
             Initial values of the fitted parameters. Should have exactly as many 
             elements as the parname list. The p0 values should be within the 
             corresponding p_range.
             Default is None.
    p_form   : list
             Sets whether p[i] is logarithmic (i.e. that val = 10**p[i]) or linear
             (val = p[i]). Must have as many elements as p0.
             Default is None.
    p_formprior : list
             Sets the functional form of the prior probability distribution. It 
             should be set to 'normal' or 'uniform' for a Gaussian or rectangular 
             distribution, respectively. Must have exactly as many elements as p0.
             Default is None.
    p_sigma  : list
             Width of the Gaussian function. Must have exactly as many elements 
             as p0. If p_formprior[i] is 'uniform', then p_sigma[i] is not used.
             Default is None.
    debug    : bool
             Passes debug argument to the emcee module. If set then more information 
             is written to standard output about the MPI processes.
             Default is False.
    kwargs   : dict
             Dictionary containing keyword arguments for the lnpostfn() function.
             For details see the docstring of bayes.lnpostfn(). 
             Important: the dpc, incl, PA, dRA, dDec parameters given as an 
             argument to lnpostfn() overwrite the corresponding values given in 
             impar. If kwargs is not set, then these will be overwritten by the 
             lnpostfn() default arguments!
             Default is None.
    dpc      : float
             Distance to the modelled object in parsecs. If dpc is not defined 
             in kwargs, then this value is used.
             Default is 1.0.
    incl     : float
             Inclination of the model in degrees. If incl is not a fit parameter 
             or not set in kwargs, then this value is used.
             Default is 60.
    '''
    if use_mpi:
        version = mpi4py.MPI.Get_version()
        comm = mpi4py.MPI.COMM_WORLD
        size = comm.Get_size()
        rank = comm.Get_rank()
        universe_size = comm.Get_attr(mpi4py.MPI.UNIVERSE_SIZE)

        print(
            'INFO [{:06}]: MPI: {} (version), {} (size), {} (rank), {} (universe)'
            .format(0, version, size, rank, universe_size))

        pool = MPIPool(comm=comm, debug=debug, loadbalance=loadbalance)

        if not pool.is_master():
            os.chdir(main_dir)
            pool.wait()
            sys.exit(0)

        nthreads = pool.size
    else:
        pool = None

    # Change to main_dir in order to find the UV obs. data files
    current_dir = os.path.realpath('.')
    os.chdir(main_dir)

    print(
        "INFO [{:06}]: nthreads [{}], nthreads_openmp [{}], nwalkers [{}], nburnin [{}], nsteps [{}]"
        .format(0, nthreads, nthreads_openmp, nwalkers, nburnin, nsteps))
    print("INFO [{:06}]: USE_MPI is {}".format(0, use_mpi))

    # Check for minimum function call arguments
    if not uvdata:
        raise ValueError('uvdata must be provided!')

    if paramfile is None:
        print(
            'WARN [{:06}]: paramfile not provided, using defaults!'.format(0))

    if impar is None:
        print(
            'WARN [{:06}]: impar is not provided, computing parameters from uvdata!'
            .format(0))

    if parname is None:
        raise ValueError('parname must be provided!')
    elif type(parname) is not list:
        raise TypeError('parname must be a list!')

    if p_ranges is None:
        raise ValueError('p_ranges must be provided!')
    elif type(p_ranges) is not list:
        raise TypeError('p_ranges must be a list!')
    elif len(p_ranges) != len(parname):
        raise ValueError('len(p_ranges)[{:d}] != len(parname[{:d}])'.format(
            len(p_ranges, len(parname))))

    if p0 is None:
        raise ValueError('p0 must be provided!')

    if p_form is None:
        raise ValueError('p_form must be provided!')

    if p_formprior is None:
        raise ValueError('p_formprior must be provided!')

    if p_sigma is None:
        raise ValueError('p_sigma must be provided!')

    # Read parameter file
    par = main.getParams(paramfile=paramfile)

    # Set parameters for bayes.lnpostfn() function
    if kwargs is None:
        print(
            'WARN [{:06}]: kwargs is not provided, using defaults!'.format(0))
        print('WARN [{:06}]: using dpc  = {:6.2f}'.format(0, dpc))
        print('WARN [{:06}]: using incl = {:6.2f}'.format(0, incl))

        kwargs = {
            'dpc': dpc,
            'incl': incl,
            'verbose': verbose,
            'PA': 0.0,
            'dRA': 0.0,
            'dDec': 0.0,
            'idisk': True,
            'ienv': True,
            'icav': False,
            'islab': False,
            'cleanModel': True,
            'binary': True,
            'chi2_only': True,
            'galario_check': False,
            'time': True
        }

    # Set image parameters if not provided
    if impar is None:
        impar = []

        if type(uvdata) is dict:
            uvdata = [uvdata]

        for dset in uvdata:
            wav_ = dset['wav']
            wav_m = wav_ * 1.0e-6
            npix_, dpix_ = galario.double.get_image_size(
                dset['u'] / wav_m, dset['v'] / wav_m)
            dpix_au = dpix_ / galario.arcsec * kwargs['dpc']
            sizeau_ = npix_ * dpix_au

            impar.append({
                'npix': npix_,
                'wav': wav_,
                'sizeau': sizeau_,
                'incl': kwargs['incl']
            })
            print(
                '''INFO [{:06}]: visibility dataset found: npix = {}, sizeau = {:.2f}, wav = {:.2f}'''
                .format(0, npix_, sizeau_, wav_))

    # Set sloppynes
    for ip in impar:
        ip['sloppy'] = sloppy

    # Update kwargs keys if needed
    kwargs['verbose'] = verbose
    kwargs['impar'] = impar
    kwargs['nthreads'] = nthreads_openmp

    # Number of fitted parameters
    ndim = len(p_ranges)

    # initialize the walkers with an ndim-dimensional Gaussian ball
    if resume:
        resume_data = tools.read_chain_ascii(restart_file)
        if nwalkers != resume_data.nwalkers:
            raise ValueError(
                'ERROR: walker number does not match resume file.')
        pos = []
        for pv in resume_data.chain[:, -1, :]:
            pos.append(pv)
        lnprob0 = resume_data.lnprob[:, -1]
    else:
        pos = [p0 + 1.0e-2 * np.random.randn(ndim) for i in range(nwalkers)]
        lnprob0 = None

    nsteps += nburnin  # set total steps in chain

    ## Create chain_file, if already exists then rename output
    while os.path.isfile(chain_file):
        counter = 0
        selem = chain_file.split('.')
        main_name = ''.join(selem[0:-1])
        try:
            selem2 = main_name.split('_')
            counter = int(selem2[-1]) + 1
            main_name = ''.join(selem2[0:-1])
        except:
            pass
        chain_file = '{}_{}.{}'.format(main_name, counter, selem[-1])

    # Create chain file and write header
    f = open(chain_file, "w")
    f.write(
        '# nwalkers = {:3d}, nthreads = {:3d}, nthreads_openmp = {:3d}, nsteps = {:5d}, MPI = {}\n'
        .format(nwalkers, nthreads, nthreads_openmp, nsteps, use_mpi))
    f.write('# i_walker {}  lnprob\n'.format(''.join(
        np.vectorize(" %s".__mod__)(parname))))
    f.close()

    # Create and run sampler
    sampler = EnsembleSampler(nwalkers,
                              ndim,
                              bayes.lnpostfn,
                              args=[
                                  p_form, p_ranges, p_formprior, p0, p_sigma,
                                  parname, par, main_dir, uvdata
                              ],
                              kwargs=kwargs,
                              threads=nthreads,
                              pool=pool)

    print("INFO [{:06}]: RUN {} main steps".format(0, nsteps))
    print("INFO [{:06}]: status info at every 100 steps:".format(0))

    f = open(chain_file, "a")

    for step in sampler.sample(pos, iterations=nsteps, lnprob0=lnprob0):
        position = step[0]
        lnprob = step[1]
        # Write restart file
        for k in range(nwalkers):
            posstr = ''.join(np.vectorize("%12.5E ".__mod__)(position[k]))
            f.write("{:04d} {:s}{:12.5E}\n".format(k, posstr, lnprob[k]))
        f.flush()
        # Run garbage collection
        gc.collect()

    f.close()

    print("INFO [{:06}]: DONE {} main steps".format(0, nsteps))

    # Close pool
    if use_mpi:
        pool.close()

    # Extract results
    chain = sampler.chain[:, :, :]
    accept_frac = sampler.acceptance_fraction
    lnprob = sampler.lnprobability

    if resume:
        chain = np.concatenate((resume_data['chain'], chain), axis=1)
        nsteps = nsteps + resume_data['nsteps']

    # Save results
    results = {
        'chain': chain,
        'accept_frac': accept_frac,
        'lnprob': lnprob,
        'parname': parname,
        'p_ranges': p_ranges,
        'p0': p0,
        'p_form': p_form,
        'p_formprior': p_formprior,
        'p_sigma': p_sigma,
        'ndim': ndim,
        'nwalkers': nwalkers,
        'nthreads': nthreads,
        'nsteps': nsteps,
        'nburnin': nburnin,
        'uvdata': uvdata,
        'impar': impar
    }

    # Save chain and metadata
    # Note that protocol=2 needed for python 2/3 compatibility
    pickle.dump(results, open('chain.p', 'wb'), protocol=2)

    # Return
    os.chdir(current_dir)
    return results