Example #1
0
def optimize():
    from emcee import EnsembleSampler
    import multiprocessing as mp

    ndim = 4
    nwalkers = 4 * ndim

    p0 = np.array(
        [
            np.random.uniform(1000, 5000, nwalkers),
            np.random.uniform(0.1, 1.0, nwalkers),
            np.random.uniform(2, 12, nwalkers),
            np.random.uniform(0.1, 1.5, nwalkers),
        ]
    ).T

    sampler = EnsembleSampler(nwalkers, ndim, lnprob, threads=mp.cpu_count())

    pos, prob, state = sampler.run_mcmc(p0, 1000)
    sampler.reset()
    print("Burned in")

    # actual run
    pos, prob, state = sampler.run_mcmc(pos, 1000)

    # Save the last position of the walkers
    np.save("walkers_emcee.npy", pos)
    np.save("eparams_emcee.npy", sampler.flatchain)
Example #2
0
    def do_mcmc(self,
                nwalker=100,
                nburn=50,
                nchain=50,
                threads=1,
                set_prior=True):

        # initial walkers for MCMC
        ndim = 2
        pinit = np.zeros((nwalker, ndim))
        pinit[:, 0] = np.random.uniform(-10, -2, nwalker)
        pinit[:, 1] = np.random.uniform(np.log10(self.lc.dt_min / 10),
                                        np.log10(self.lc.dt_tot * 10), nwalker)

        #start sampling
        sampler = EnsembleSampler(nwalker,
                                  ndim,
                                  self.lnprob,
                                  args=(self.lc, set_prior),
                                  threads=threads)
        # burn-in
        pos, prob, state = sampler.run_mcmc(pinit, nburn)
        sampler.reset()
        # actual samples
        sampler.run_mcmc(pos, nchain, rstate0=state)
        self.sampler = sampler
        self.flatchain = sampler.flatchain
        self.lnprobability = sampler.lnprobability
Example #3
0
    def runsample(self, sed_obs, sed_obs_err, vpi_obs, vpi_obs_err,
                  Lvpi=1.0, Lprior=1.0, nsteps=(1000, 1000, 2000), p0try=None):

        ndim = 4                # 4 stands for [Teff, logg, Av, DM]
        nwalkers = len(p0try)   # number of chains

        for i in range(len(nsteps)):
            if i == 0:
                # initialize sampler
                sampler = EnsembleSampler(nwalkers, ndim, costfun,
                                          args=(self.r, self.p_bounds,
                                                self.Alambda, sed_obs,
                                                sed_obs_err, vpi_obs,
                                                vpi_obs_err, Lvpi, Lprior))
                # guess Av and DM for p0try
                p0try = np.array([initial_guess(_, self.r, self.Alambda, sed_obs, sed_obs_err) for _ in p0try])
                # run sampler
                pos, _, __ = sampler.run_mcmc(p0try, nsteps[i])
            else:
                # generate new p
                p_rand = random_p(sampler, nloopmax=1000, method="mle",
                                  costfun=costfun, args=(self.r, self.p_bounds,
                                                         self.Alambda, sed_obs,
                                                         sed_obs_err, vpi_obs,
                                                         vpi_obs_err,
                                                         Lvpi, Lprior))
                # reset sampler
                sampler.reset()
                # run at new p
                pos1, lnprob1, rstate1 = sampler.run_mcmc(p_rand, nsteps[i])
        return sampler
Example #4
0
    def mcmc(self, n_walkers, n_iter, n_burnin, lnprob, args, pos0, chain_labels,
             pool=None, progress=True, out_file=None):
        """
        PARAMETERS
        ----------
        `n_walkers` (int): the number of walkers to use
        `n_iter` (int): the number of sample iterations to perform post burn-in
        `n_burnin` (int): the number of burn-in steps to perform
        `lnprob` (func): function returning the log-posterior probability
        `args` (tuple): arguments to be passed to `lnprob`
        `pos0` (list-like): list of initial walker positions
        `chain_labels` (list of str): list of column labels for the sample chains
        `out_file` (str, optional): the user has the option to save the sample
            chains and blobs to a csv or pickle file. This is the path to the
            output filename.

        RETURNS
        -------
        `output`: a pandas DataFrame containing all the sample chains and blobs

        """
       
        n_dim = len(chain_labels)
        sampler = EnsembleSampler(n_walkers, n_dim, lnprob, args=args,
                                        pool=pool,
                                        blobs_dtype=[("star", pd.Series)])

        # Burn-in phase
        if n_burnin != 0:
            print("Burn-in phase...", end="\r")
            pos, prob, state, blobs = sampler.run_mcmc(pos0, n_burnin)
            sampler.reset()
        else:
            pos = pos0

        # Sampling phase
        pos, prob, state, blobs = sampler.run_mcmc(pos, n_iter, 
                                                   progress=progress)

        samples = pd.DataFrame(sampler.flatchain, columns=chain_labels)
        blobs = sampler.get_blobs(flat=True)
        blobs = pd.concat(blobs["star"], axis=1).T

        output = pd.concat([samples, blobs], axis=1)
        if out_file is not None:
            if "csv" in out_file:
                output.to_csv(out_file, index=False)
            else:
                output.to_pickle(out_file)

        return sampler, output
Example #5
0
    def __call__(self, nw=None, nt=None, nb=None, ns=None):
        if nw is None:
            nw = self.nWalkers
        else:
            self.nWalkers = nw
            self._initial_parameters()
        if nt is None:
            nt = self.nThreads
        if nb is None:
            nb = self.nBurnin
        if ns is None:
            ns = self.nSteps

        # setup emcee sampler
        sampler = EnsembleSampler(nw, self.nDim, self.lnProb, threads=nt)

        if nb:
            # Run burn-in steps
            pos, prob, state = sampler.run_mcmc(self.pos0, nb)

            # Reset the chain to remove the burn-in samples
            sampler.reset()

            # from the final position in burn-in chain, sample for nsteps
            sampler.run_mcmc(pos, ns, rstate0=state)
        else:
            # sample for nsteps
            sampler.run_mcmc(self.pos0, ns)

        samples = sampler.flatchain
        lnprobs = sampler.flatlnprobability

        indxs = np.where(lnprobs > -float_info.max)[0]
        if self.scale == 'linear':
            samples = samples[indxs]
        elif self.scale == 'log':
            samples = np.power(10, samples[indxs])
        else:
            raise Exception("prior scale must be set")
        lnprobs = lnprobs[indxs]

        Xmin = max(lnprobs)
        indmin = np.where(lnprobs == Xmin)[0][0]

        vals = samples[indmin]

        return vals, samples, lnprobs
Example #6
0
    def do_mcmc(self, nwalker=100, nburn=50, nchain=50, threads=1, set_prior=True):

        # initial walkers for MCMC
        ndim = 2
        pinit = np.zeros((nwalker, ndim))
        pinit[:,0] = np.random.uniform(-10, -2, nwalker)
        pinit[:,1] = np.random.uniform(np.log10(self.lc.dt_min/10), np.log10(self.lc.dt_tot*10), nwalker)

        #start sampling
        sampler = EnsembleSampler(nwalker, ndim, self.lnprob, args=(self.lc,set_prior), threads=threads)
        # burn-in 
        pos, prob, state = sampler.run_mcmc(pinit, nburn)
        sampler.reset()
        # actual samples
        sampler.run_mcmc(pos, nchain, rstate0=state)
        self.sampler = sampler
        self.flatchain = sampler.flatchain
        self.lnprobability = sampler.lnprobability
Example #7
0
	def MCMC(self, nwalkers=50, nburn=200, nMCMC=1000, use_MPI=False, chain_file='chain.dat', fig_name='./MCMC_corner.png', plot_corner=False, **kwargs):
		# The function to carry out MCMC. 
		# parameters:
		# 	nwalkers: int, optional
		# 		the number of walkers in MCMC, which must be even. 
		# 		default: 50
		# 	nburn: int, optional
		# 		the number of burn-in steps in MCMC.
		# 		default: 200
		# 	nMCMC: int, optional
		# 		the number of final MCMC steps in MCMC.
		# 		default: 1000
		# 	use_MPI: Boolean, optional
		# 		whether to use MPI. 
		# 		default: False
		# returns:
		# 	p_best: array_like
		# 		best fitting parameter set.
		
		# Initialize the walkers with a set of initial points, p0.
		E0 = np.random.normal(0.5, 0.3, size=nwalkers)
		T0 = np.random.normal(0.5, 0.3, size=nwalkers)
		a0 = np.random.normal(0, 0.7, size=nwalkers)
		covEE = truncnorm.rvs(0, 1, loc=0.3, scale=0.1, size=nwalkers)
		covTT = truncnorm.rvs(0 ,1, loc=0.3, scale=0.1, size=nwalkers)
		covaa = truncnorm.rvs(0, 1, loc=0.1, scale=0.1, size=nwalkers)
		covEa = truncnorm.rvs(0, 1, loc=0.1, scale=0.1, size=nwalkers)
		p0 = [[E0[i], T0[i], a0[i], covEE[i], covTT[i], covaa[i], covEa[i]] for i in range(nwalkers)]
		print 'start MCMC.'


		if not use_MPI:
			sampler = EnsembleSampler(nwalkers, self.ndim, lnprob, **kwargs)	
			# sampler = EnsembleSampler(nwalkers, self.ndim, lnprob, \
			# 		  args=(self.E_grid, self.T_grid, self.a_grid, self.ba_lgSMA_bins, self.bin_obs, self.num_obs), **kwargs)	

		# When using MPI, we differentiate between different processes.
		else:
			pool = MPIPool()
			if not pool.is_master():
				pool.wait()
				sys.exit(0)
			# sampler = EnsembleSampler(nwalkers, self.ndim, lnprob, \
			# 		  args=(self.E_grid, self.T_grid, self.a_grid, self.ba_lgSMA_bins, self.bin_obs, self.num_obs), pool=pool, **kwargs)
			sampler = EnsembleSampler(nwalkers, self.ndim, lnprob,  pool=pool, **kwargs)

		# burn-in phase
		pos, prob, state = sampler.run_mcmc(p0, nburn, chain_file=chain_file)
		sampler.reset()

		# MCMC phase
		sampler.run_mcmc(pos, nMCMC, chain_file=chain_file)

		if use_MPI:
			pool.close()
		
		# If we want to make classic corner plots...
		if plot_corner:
			samples = sampler.chain[:, nMCMC / 2:, :].reshape((-1, self.ndim))
			fig = corner.corner(samples, labels=['E', 'T', 'a', 'covEE', 'covTT', 'covaa', 'covEa'])
			fig.savefig(fig_name)

		# Get the best fitting parameters. We take the median parameter value for the ensemble
		# of steps with log-probabilities within the largest 30% among the whole ensemble as the
		# best parameters.
		samples = sampler.flatchain
		lnp = sampler.flatlnprobability
		crit_lnp = np.percentile(lnp, 70)
		good = np.where(lnp > crit_lnp)
		p_best = [np.median(samples[good, i]) for i in range(self.ndim)]

		return np.array(p_best)
Example #8
0
    def MCMC(self):
        """
        Run MCMC

        Explore the parameter space around the current pL with MCMC

        Adds the following attributes to the transit model structure:

        upL0  : 3x3 array of the 15, 50, and 85-th percentiles of
                Rp/Rstar, tau, and b
        chain : nsamp x 3 array of the parameters tried in the MCMC chain.
        fits  : Light curve fits selected randomly from the MCMC chain.
        """

        # MCMC parameters
        nwalkers = 10
        ndims = 3
        nburn = 1000
        niter = 2000
        print("""\
running MCMC
------------
%6i walkers
%6i step burn in
%6i step run
""" % (nwalkers, nburn, niter))

        # Initialize walkers
        pL = self.pdict2pL(self.pdict)
        fltpars = [k for k in list(self.pdict.keys()) if not self.fixdict[k]]
        allpars = list(self.pdict.keys())
        p0 = np.vstack([pL] * nwalkers)
        for i, name in zip(list(range(ndims)), fltpars):
            if name == 'p':
                p0[:, i] += 1e-4 * np.random.randn(nwalkers)
            elif name == 'tau':
                p0[:, i] += 1e-2 * pL[i] * np.random.random(nwalkers)
            elif name == 'b':
                p0[:, i] = 0.8 * np.random.random(nwalkers) + .1

        # Burn in
        sampler = EnsembleSampler(nwalkers, ndims, self)
        pos, prob, state = sampler.run_mcmc(p0, nburn)

        # Real run
        sampler.reset()
        foo = sampler.run_mcmc(pos, niter, rstate0=state)

        chain = pd.DataFrame(sampler.flatchain, columns=fltpars)
        uncert = pd.DataFrame(index=['15,50,85'.split(',')], columns=allpars)
        for k in list(self.pdict.keys()):
            if self.fixdict[k]:
                chain[k] = self.pdict[k]
                uncert[k] = self.pdict[k]
            else:
                uncert[k] = np.percentile(chain[k], [15, 50, 85])

        nsamp = 200
        ntrial = sampler.flatchain.shape[0]
        id = np.random.random_integers(0, ntrial - 1, nsamp)

        f = lambda i: self.MA(self.pL2pdict(sampler.flatchain[i]), self.t)
        fits = np.vstack(list(map(f, id)))

        uncert = uncert.to_records(index=False)
        chain = chain.to_records(index=False)

        self.add_dset('uncert', uncert, description='uncertainties')
        self.add_dset('chain', chain, description='MCMC chain')
        self.add_dset('fits', fits, description='Fits from MCMC chain')
        self.completed_mcmc = 1  # Note that MCMC was sucessful
Example #9
0
class LPFunction(object):
    """A basic log posterior function class.
    """
    def __init__(self, time, flux, nthreads=1):

        # Set up the transit model
        # ------------------------
        self.tm = MA(interpolate=True, klims=(0.08, 0.13), nthr=nthreads)
        self.nthr = nthreads

        # Initialise data
        # ---------------
        self.time = time.copy() if time is not None else array([])
        self.flux_o = flux.copy() if flux is not None else array([])
        self.npt = self.time.size

        # Set the optimiser and the MCMC sampler
        # --------------------------------------
        self.de = None
        self.sampler = None

        # Set up the parametrisation and priors
        # -------------------------------------
        psystem = [
            GParameter('tc', 'zero_epoch', 'd', NP(1.01, 0.02), (-inf, inf)),
            GParameter('pr', 'period', 'd', NP(2.50, 1e-7), (0, inf)),
            GParameter('rho', 'stellar_density', 'g/cm^3', UP(0.90, 2.50),
                       (0.90, 2.5)),
            GParameter('b', 'impact_parameter', 'R_s', UP(0.00, 1.00),
                       (0.00, 1.0)),
            GParameter('k2', 'area_ratio', 'A_s', UP(0.08**2, 0.13**2),
                       (1e-8, inf))
        ]

        pld = [
            PParameter('q1', 'q1_coefficient', '', UP(0, 1), bounds=(0, 1)),
            PParameter('q2', 'q2_coefficient', '', UP(0, 1), bounds=(0, 1))
        ]

        pbl = [
            LParameter('es',
                       'white_noise',
                       '',
                       UP(1e-6, 1e-2),
                       bounds=(1e-6, 1e-2))
        ]
        per = [
            LParameter('bl',
                       'baseline',
                       '',
                       NP(1.00, 0.001),
                       bounds=(0.8, 1.2))
        ]

        self.ps = ParameterSet()
        self.ps.add_global_block('system', psystem)
        self.ps.add_passband_block('ldc', 2, 1, pld)
        self.ps.add_lightcurve_block('baseline', 1, 1, pbl)
        self.ps.add_lightcurve_block('error', 1, 1, per)
        self.ps.freeze()

    def compute_baseline(self, pv):
        """Constant baseline model"""
        return full_like(self.flux_o, pv[8])

    def compute_transit(self, pv):
        """Transit model"""
        _a = as_from_rhop(
            pv[2], pv[1]
        )  # Scaled semi-major axis from stellar density and orbital period
        _i = mt.acos(
            pv[3] /
            _a)  # Inclination from impact parameter and semi-major axis
        _k = mt.sqrt(pv[4])  # Radius ratio from area ratio

        a, b = mt.sqrt(pv[5]), 2 * pv[6]
        _uv = array([a * b,
                     a * (1. - b)])  # Quadratic limb darkening coefficients

        return self.tm.evaluate(self.time, _k, _uv, pv[0], pv[1], _a, _i)

    def compute_lc_model(self, pv):
        """Combined baseline and transit model"""
        return self.compute_baseline(pv) * self.compute_transit(pv)

    def lnprior(self, pv):
        """Log prior"""
        if any(pv < self.ps.lbounds) or any(pv > self.ps.ubounds):
            return -inf
        else:
            return self.ps.lnprior(pv)

    def lnlikelihood(self, pv):
        """Log likelihood"""
        flux_m = self.compute_lc_model(pv)
        return ll_normal_es(self.flux_o, flux_m, pv[7])

    def lnposterior(self, pv):
        """Log posterior"""
        lnprior = self.lnprior(pv)
        if isinf(lnprior):
            return lnprior
        else:
            return lnprior + self.lnlikelihood(pv)

    def create_pv_population(self, npop=50):
        return self.ps.sample_from_prior(npop)

    def optimize(self,
                 niter=200,
                 npop=50,
                 population=None,
                 label='Optimisation'):
        """Global optimisation using Differential evolution"""
        if self.de is None:
            self.de = DiffEvol(self.lnposterior,
                               clip(self.ps.bounds, -1, 1),
                               npop,
                               maximize=True)
            if population is None:
                self.de._population[:, :] = self.create_pv_population(npop)
            else:
                self.de._population[:, :] = population
        for _ in tqdm(self.de(niter), total=niter, desc=label):
            pass

    def sample(self, niter=500, thin=5, label='MCMC sampling', reset=False):
        """MCMC sampling using emcee"""
        if self.sampler is None:
            self.sampler = EnsembleSampler(self.de.n_pop, self.de.n_par,
                                           self.lnposterior)
            pop0 = self.de.population
        else:
            pop0 = self.sampler.chain[:, -1, :].copy()
        if reset:
            self.sampler.reset()
        for _ in tqdm(self.sampler.sample(pop0, iterations=niter, thin=thin),
                      total=niter,
                      desc=label):
            pass
Example #10
0
class BaseLPF:
    _lpf_name = 'BaseLPF'

    def __init__(self, name: str, passbands: list, times: list = None, fluxes: list = None, errors: list = None,
                 pbids: list = None, covariates: list = None, wnids: list = None, tm: TransitModel = None,
                 nsamples: tuple = 1, exptimes: tuple = 0., init_data=True, result_dir: Path = None):
        """The base Log Posterior Function class.

        The `BaseLPF` class creates the basis for transit light curve analyses using `PyTransit`. This class can be
        used in a basic analysis directly, or it can be inherited to create a basis for a more complex analysis.

        Parameters
        ----------
        name: str
            Name of the log posterior function instance.

        passbands: iterable
            List of unique passband names (filters) that the light curves have been observed in.

        times: iterable
            List of 1d ndarrays each containing the mid-observation times for a single light curve.

        fluxes: iterable
            List of 1d ndarrays each containing the normalized fluxes for a single light curve.

        errors: iterable
            List of 1d ndarrays each containing the flux measurement uncertainties for a single light curvel.

        pbids: iterable of ints
            List of passband indices mapping each light curve to a single passband.

        covariates: iterable
            List of covariates one 2d narray per light curve.

        wnids: iterable of ints
            List of noise set indices mapping each light curve to a single noise set.

        tm: TransitModel
            Transitmodel to use instead of the default model.

        nsamples: list[int]
            List of supersampling factors.  The values should be integers and given one per light curve.

        exptimes: list[float]
            List of exposure times. The values should be floats with the time given in days.

        init_data: bool
            Set to `False` to allow the LPF to be initialized without data. This is mainly for debugging.

        result_dir: Path
            Default saving directory
        """

        self._pre_initialisation()

        self.tm = tm or QuadraticModel(klims=(0.01, 0.75), nk=512, nz=512)

        # LPF name
        # --------
        self.name = name
        self.result_dir = result_dir

        # Passbands
        # ---------
        # Should be arranged from blue to red
        if isinstance(passbands, (list, tuple, ndarray)):
            self.passbands = passbands
        else:
            self.passbands = [passbands]
        self.npb = npb = len(self.passbands)

        self.nsamples = None
        self.exptimes = None

        # Declare high-level objects
        # --------------------------
        self.ps = None          # Parametrisation
        self.de = None          # Differential evolution optimiser
        self.sampler = None     # MCMC sampler
        self.instrument = None  # Instrument
        self.ldsc = None        # Limb darkening set creator
        self.ldps = None        # Limb darkening profile set
        self.cntm = None        # Contamination model

        # Declare data arrays and variables
        # ---------------------------------
        self.nlc: int = 0                # Number of light curves
        self.n_noise_blocks: int = 0     # Number of noise blocks
        self.noise_ids = None
        self.times: list = None          # List of time arrays
        self.fluxes: list = None         # List of flux arrays
        self.errors: list = None         # List of flux uncertainties
        self.covariates: list = None     # List of covariates
        self.wn: ndarray = None          # Array of white noise estimates for each light curve
        self.timea: ndarray = None       # Array of concatenated times
        self.mfluxa: ndarray = None      # Array of concatenated model fluxes
        self.ofluxa: ndarray = None      # Array of concatenated observed fluxes
        self.errora: ndarray = None      # Array of concatenated model fluxes

        self.lcids: ndarray = None       # Array of light curve indices for each datapoint
        self.pbids: ndarray = None       # Array of passband indices for each light curve
        self.lcslices: list = None       # List of light curve slices

        self._local_minimization = None

        # Initialise the additional lnprior list
        # --------------------------------------
        self.lnpriors = []

        if init_data:
            # Set up the observation data
            # ---------------------------
            self._init_data(times = times, fluxes = fluxes, pbids = pbids, covariates = covariates,
                            errors = errors, wnids = wnids, nsamples = nsamples, exptimes = exptimes)

            # Set up the parametrisation
            # --------------------------
            self._init_parameters()

            # Inititalise the instrument
            # --------------------------
            self._init_instrument()

        self._post_initialisation()


    def _init_data(self, times, fluxes, pbids=None, covariates=None, errors=None, wnids = None, nsamples=1, exptimes=0.):

        if isinstance(times, ndarray) and times.ndim == 1 and times.dtype == float:
            times = [times]

        if isinstance(fluxes, ndarray) and fluxes.ndim == 1 and fluxes.dtype == float:
            fluxes = [fluxes]

        if pbids is None:
            if self.pbids is None:
                self.pbids = zeros(len(fluxes), int)
        else:
            self.pbids = atleast_1d(pbids).astype('int')

        self.nlc = len(times)
        self.times = times
        self.fluxes = fluxes
        self.wn = [nanstd(diff(f)) / sqrt(2) for f in fluxes]
        self.timea = concatenate(self.times)
        self.ofluxa = concatenate(self.fluxes)
        self.mfluxa = zeros_like(self.ofluxa)
        self.lcids = concatenate([full(t.size, i) for i, t in enumerate(self.times)])

        # TODO: Noise IDs get scrambled when removing transits, fix!!!
        if wnids is None:
            if self.noise_ids is None:
                self.noise_ids = zeros(self.nlc, int)
                self.n_noise_blocks = 1
        else:
            self.noise_ids = asarray(wnids)
            self.n_noise_blocks = len(unique(self.noise_ids))
            assert self.noise_ids.size == self.nlc, "Need one noise block id per light curve."
            assert self.noise_ids.max() == self.n_noise_blocks - 1, "Error initialising noise block ids."

        if isscalar(nsamples):
            self.nsamples = full(self.nlc, nsamples)
            self.exptimes = full(self.nlc, exptimes)
        else:
            assert (len(nsamples) == self.nlc) and (len(exptimes) == self.nlc)
            self.nsamples = asarray(nsamples, 'int')
            self.exptimes = asarray(exptimes)

        self.tm.set_data(self.timea, self.lcids, self.pbids, self.nsamples, self.exptimes)

        if errors is None:
            self.errors = array([full(t.size, nan) for t in self.times])
        else:
            self.errors = asarray(errors)
        self.errora = concatenate(self.errors)

        # Initialise the light curves slices
        # ----------------------------------
        self.lcslices = []
        sstart = 0
        for i in range(self.nlc):
            s = self.times[i].size
            self.lcslices.append(s_[sstart:sstart + s])
            sstart += s

        # Initialise the covariate arrays, if given
        # -----------------------------------------
        if covariates is not None:
            self.covariates = covariates
            for cv in self.covariates:
                cv = (cv - cv.mean(0)) / cv.std(0)
            #self.ncovs = self.covariates[0].shape[1]
            #self.covsize = array([c.size for c in self.covariates])
            #self.covstart = concatenate([[0], self.covsize.cumsum()[:-1]])
            #self.cova = concatenate(self.covariates)

    def print_parameters(self, columns: int = 2):
        columns = max(1, columns)
        for i, p in enumerate(self.ps):
            print(p.__repr__(), end=('\n' if i % columns == columns - 1 else '\t'))

    def _init_parameters(self):
        self.ps = ParameterSet()
        self._init_p_orbit()
        self._init_p_planet()
        self._init_p_limb_darkening()
        self._init_p_baseline()
        self._init_p_noise()
        self.ps.freeze()

    def _init_p_orbit(self):
        """Orbit parameter initialisation.
        """
        porbit = [
            GParameter('tc',  'zero_epoch',       'd',      N(0.0,  0.1), (-inf, inf)),
            GParameter('p',   'period',           'd',      N(1.0, 1e-5), (0,    inf)),
            GParameter('rho', 'stellar_density',  'g/cm^3', U(0.1, 25.0), (0,    inf)),
            GParameter('b',   'impact_parameter', 'R_s',    U(0.0,  1.0), (0,      1))]
        self.ps.add_global_block('orbit', porbit)

    def _init_p_planet(self):
        """Planet parameter initialisation.
        """
        pk2 = [PParameter('k2', 'area_ratio', 'A_s', GM(0.1), (0.01**2, 0.75**2))]
        self.ps.add_passband_block('k2', 1, 1, pk2)
        self._pid_k2 = repeat(self.ps.blocks[-1].start, self.npb)
        self._start_k2 = self.ps.blocks[-1].start
        self._sl_k2 = self.ps.blocks[-1].slice

    def _init_p_limb_darkening(self):
        """Limb darkening parameter initialisation.
        """
        pld = concatenate([
            [PParameter('q1_{:d}'.format(i), 'q1_coefficient', '', U(0, 1), bounds=(0, 1)),
             PParameter('q2_{:d}'.format(i), 'q2_coefficient', '', U(0, 1), bounds=(0, 1))]
            for i in range(self.npb)])
        self.ps.add_passband_block('ldc', 2, self.npb, pld)
        self._sl_ld = self.ps.blocks[-1].slice
        self._start_ld = self.ps.blocks[-1].start

    def _init_p_baseline(self):
        """Baseline parameter initialisation.
        """
        self._sl_bl = None

    def _init_p_noise(self):
        """Noise parameter initialisation.
        """
        pns = [LParameter('loge_{:d}'.format(i), 'log10_error_{:d}'.format(i), '', U(-4, 0), bounds=(-4, 0)) for i in range(self.n_noise_blocks)]
        self.ps.add_lightcurve_block('log_err', 1, self.n_noise_blocks, pns)
        self._sl_err = self.ps.blocks[-1].slice
        self._start_err = self.ps.blocks[-1].start

    def _init_instrument(self):
        pass

    def _pre_initialisation(self):
        pass

    def _post_initialisation(self):
        pass

    def create_pv_population(self, npop=50):
        pvp = self.ps.sample_from_prior(npop)
        for sl in self.ps.blocks[1].slices:
            pvp[:,sl] = uniform(0.01**2, 0.25**2, size=(npop, 1))

        # With LDTk
        # ---------
        #
        # Use LDTk to create the sample if LDTk has been initialised.
        #
        if self.ldps:
            istart = self._start_ld
            cms, ces = self.ldps.coeffs_tq()
            for i, (cm, ce) in enumerate(zip(cms.flat, ces.flat)):
                pvp[:, i + istart] = normal(cm, ce, size=pvp.shape[0])

        # No LDTk
        # -------
        #
        # Ensure that the total limb darkening decreases towards
        # red passbands.
        #
        else:
            ldsl = self._sl_ld
            for i in range(pvp.shape[0]):
                pid = argsort(pvp[i, ldsl][::2])[::-1]
                pvp[i, ldsl][::2] = pvp[i, ldsl][::2][pid]
                pvp[i, ldsl][1::2] = pvp[i, ldsl][1::2][pid]

        # Estimate white noise from the data
        # ----------------------------------
        for i in range(self.nlc):
            wn = diff(self.ofluxa).std() / sqrt(2)
            pvp[:, self._start_err] = log10(uniform(0.5*wn, 2*wn, size=npop))
        return pvp

    def baseline(self, pv):
        """Multiplicative baseline"""
        return 1.

    def trends(self, pv):
        """Additive trends"""
        return 0.

    def transit_model(self, pv, copy=True):
        pv = atleast_2d(pv)
        pvp = map_pv(pv)
        ldc = map_ldc(pv[:,self._sl_ld])
        flux = self.tm.evaluate_pv(pvp, ldc, copy)
        return flux

    def flux_model(self, pv):
        baseline    = self.baseline(pv)
        trends      = self.trends(pv)
        model_flux = self.transit_model(pv)
        return baseline * model_flux + trends

    def residuals(self, pv):
        return self.ofluxa - self.flux_model(pv)

    def set_prior(self, parameter, prior, *nargs) -> None:
        if isinstance(parameter, str):
            descriptions = self.ps.descriptions
            names = self.ps.names
            if parameter in descriptions:
                parameter = descriptions.index(parameter)
            elif parameter in names:
                parameter = names.index(parameter)
            else:
                params = ', '.join([f"{ln} ({sn})" for ln, sn in zip(self.ps.descriptions, self.ps.names)])
                raise ValueError(f'Parameter "{parameter}" not found from the parameter set: {params}')

        if isinstance(prior, str):
            if prior.lower() in ['n', 'np', 'normal']:
                prior = N(nargs[0], nargs[1])
            elif prior.lower() in ['u', 'up', 'uniform']:
                prior = U(nargs[0], nargs[1])
            else:
                raise ValueError(f'Unknown prior "{prior}". Allowed values are (N)ormal and (U)niform.')

        self.ps[parameter].prior = prior

    def set_radius_ratio_prior(self, kmin, kmax):
        for p in self.ps[self._sl_k2]:
            p.prior = U(kmin ** 2, kmax ** 2)
            p.bounds = [kmin ** 2, kmax ** 2]
        self.ps.thaw()
        self.ps.freeze()

    def add_t14_prior(self, mean: float, std: float) -> None:
        """Add a normal prior on the transit duration.

        Parameters
        ----------
        mean: float
            Mean of the normal distribution
        std: float
            Standard deviation of the normal distribution.
        """

        def T14(pv):
            pv = atleast_2d(pv)
            a = as_from_rhop(pv[:, 2], pv[:, 1])
            t14 = duration_eccentric(pv[:, 1], sqrt(pv[:, 4]), a, arccos(pv[:, 3] / a), 0, 0, 1)
            return norm.logpdf(t14, mean, std)

        self.lnpriors.append(T14)

    def add_as_prior(self, mean: float, std: float) -> None:
        """Add a normal prior on the scaled semi-major axis :math:`(a / R_\star)`.

        Parameters
        ----------
        mean: float
            Mean of the normal distribution.
        std: float
            Standard deviation of the normal distribution
        """
        def as_prior(pv):
            a = as_from_rhop(pv[2], pv[1])
            return norm.logpdf(a, mean, std)
        self.lnpriors.append(as_prior)

    def add_ldtk_prior(self, teff: tuple, logg: tuple, z: tuple, passbands: tuple,
                       uncertainty_multiplier: float = 3, **kwargs) -> None:
        """Add a LDTk-based prior on the limb darkening.

        Parameters
        ----------
        teff
        logg
        z
        passbands
        uncertainty_multiplier

        Returns
        -------

        """
        if 'pbs' in kwargs.keys():
            raise DeprecationWarning("The 'pbs' argument has been renamed to 'passbands'")

        if isinstance(passbands[0], str):
            raise DeprecationWarning(
                'Passing passbands by name has been deprecated, they should be now Filter instances.')

        self.ldsc = LDPSetCreator(teff, logg, z, list(passbands))
        self.ldps = self.ldsc.create_profiles(1000)
        self.ldps.resample_linear_z()
        self.ldps.set_uncertainty_multiplier(uncertainty_multiplier)

        def ldprior(pv):
            return self.ldps.lnlike_tq(pv[:, self._sl_ld].reshape([pv.shape[0], -1, 2]))

        self.lnpriors.append(ldprior)


    def remove_outliers(self, sigma=5):
        fmodel = squeeze(self.flux_model(self.de.minimum_location))
        covariates = [] if self.covariates is not None else None
        times, fluxes, lcids, errors = [], [], [], []
        for i in range(len(self.times)):
            res = self.fluxes[i] - fmodel[self.lcslices[i]]
            mask = ~sigma_clip(res, sigma=sigma).mask
            times.append(self.times[i][mask])
            fluxes.append(self.fluxes[i][mask])
            if covariates is not None:
                covariates.append(self.covariates[i][mask])
            if self.errors is not None:
                errors.append(self.errors[i][mask])

        self._init_data(times=times, fluxes=fluxes, covariates=self.covariates, pbids=self.pbids,
                        errors=(errors if self.errors is not None else None), wnids=self.noise_ids,
                        nsamples=self.nsamples, exptimes=self.exptimes)


    def remove_transits(self, tids):
        m = ones(len(self.times), bool)
        m[tids] = False
        self._init_data(self.times[m], self.fluxes[m], self.pbids[m],
                        self.covariates[m] if self.covariates is not None else None,
                        self.errors[m], self.noise_ids[m], self.nsamples[m], self.exptimes[m])
        self._init_parameters()

    def lnprior(self, pv: ndarray) -> Union[Iterable,float]:
        """Log prior density for a 1D or 2D array of model parameters.

        Parameters
        ----------
        pv: ndarray
            Either a 1D parameter vector or a 2D parameter array.

        Returns
        -------
            Log prior density for the given parameter vector(s).
        """
        return self.ps.lnprior(pv) + self.additional_priors(pv)

    def additional_priors(self, pv):
        """Additional priors."""
        pv = atleast_2d(pv)
        return sum([f(pv) for f in self.lnpriors], 0)

    def lnlikelihood(self, pv):
        """Log likelihood for a 1D or 2D array of model parameters.

        Parameters
        ----------
        pv: ndarray
            Either a 1D parameter vector or a 2D parameter array.

        Returns
        -------
            Log likelihood for the given parameter vector(s).
        """
        flux_m = self.flux_model(pv)
        wn = 10**(atleast_2d(pv)[:,self._sl_err])
        return lnlike_normal_v(self.ofluxa, flux_m, wn, self.noise_ids, self.lcids)

    def lnposterior(self, pv):
        lnp = self.lnprior(pv) + self.lnlikelihood(pv)
        return where(isfinite(lnp), lnp, -inf)

    def __call__(self, pv):
        return self.lnposterior(pv)

    def optimize_global(self, niter=200, npop=50, population=None, label='Global optimisation', leave=False,
                        plot_convergence: bool = True, use_tqdm: bool = True):

        if self.de is None:
            self.de = DiffEvol(self.lnposterior, clip(self.ps.bounds, -1, 1), npop, maximize=True, vectorize=True)
            if population is None:
                self.de._population[:, :] = self.create_pv_population(npop)
            else:
                self.de._population[:,:] = population
        for _ in tqdm(self.de(niter), total=niter, desc=label, leave=leave, disable=(not use_tqdm)):
            pass

        if plot_convergence:
            fig, axs = subplots(1, 5, figsize=(13, 2), constrained_layout=True)
            rfit = self.de._fitness
            mfit = isfinite(rfit)

            if hasattr(self, '_old_de_fitness'):
                m = isfinite(self._old_de_fitness)
                axs[0].hist(-self._old_de_fitness[m], facecolor='midnightblue', bins=25, alpha=0.25)
            axs[0].hist(-rfit[mfit], facecolor='midnightblue', bins=25)

            for i, ax in zip([0, 2, 3, 4], axs[1:]):
                if hasattr(self, '_old_de_fitness'):
                    m = isfinite(self._old_de_fitness)
                    ax.plot(self._old_de_population[m, i], -self._old_de_fitness[m], 'kx', alpha=0.25)
                ax.plot(self.de.population[mfit, i], -rfit[mfit], 'k.')
                ax.set_xlabel(self.ps.descriptions[i])
            setp(axs, yticks=[])
            setp(axs[1], ylabel='Log posterior')
            setp(axs[0], xlabel='Log posterior')
            sb.despine(fig, offset=5)
        self._old_de_population = self.de.population.copy()
        self._old_de_fitness = self.de._fitness.copy()

    def optimize_local(self, pv0=None, method='powell'):
        if pv0 is None:
            if self.de is not None:
                pv0 = self.de.minimum_location
            else:
                pv0 = self.ps.mean_pv
                pv0[self._sl_err] = log10(self.wn)
        res = minimize(lambda pv: -self.lnposterior(pv), pv0, method=method)
        self._local_minimization = res

    def sample_mcmc(self, niter: int = 500, thin: int = 5, repeats: int = 1, npop: int = None, population=None,
                    label='MCMC sampling', reset=True, leave=True, save=False, use_tqdm: bool = True):

        if save and self.result_dir is None:
            raise ValueError('The MCMC sampler is set to save the results, but the result directory is not set.')

        if self.sampler is None:
            if population is not None:
                pop0 = population
            elif hasattr(self, '_local_minimization') and self._local_minimization is not None:
                pop0 = multivariate_normal(self._local_minimization.x, diag(full(len(self.ps), 0.001 ** 2)), size=npop)
            elif self.de is not None:
                pop0 = self.de.population.copy()
            else:
                raise ValueError('Sample MCMC needs an initial population.')
            self.sampler = EnsembleSampler(pop0.shape[0], pop0.shape[1], self.lnposterior, vectorize=True)
        else:
            pop0 = self.sampler.chain[:,-1,:].copy()

        for i in tqdm(range(repeats), desc='MCMC sampling', disable=(not use_tqdm)):
            if reset or i > 0:
                self.sampler.reset()
            for _ in tqdm(self.sampler.sample(pop0, iterations=niter, thin=thin), total=niter,
                          desc='Run {:d}/{:d}'.format(i+1, repeats), leave=False, disable=(not use_tqdm)):
                pass
            if save:
                self.save(self.result_dir)
            pop0 = self.sampler.chain[:,-1,:].copy()

    def posterior_samples(self, burn: int = 0, thin: int = 1, derived_parameters: bool = True):
        fc = self.sampler.chain[:, burn::thin, :].reshape([-1, self.de.n_par])
        df = pd.DataFrame(fc, columns=self.ps.names)
        if derived_parameters:
            for k2c in df.columns[self._sl_k2]:
                df[k2c.replace('k2', 'k')] = sqrt(df[k2c])
            df['a'] = as_from_rhop(df.rho.values, df.p.values)
            df['inc'] = i_from_baew(df.b.values, df.a.values, 0., 0.)

            average_ks = sqrt(df.iloc[:, self._sl_k2]).mean(1).values
            df['t14'] = d_from_pkaiews(df.p.values, average_ks, df.a.values, df.inc.values, 0., 0., 1)
        return df

    def plot_mcmc_chains(self, pid: int=0, alpha: float=0.1, thin: int=1, ax=None):
        fig, ax = (None, ax) if ax is not None else subplots()
        ax.plot(self.sampler.chain[:, ::thin, pid].T, 'k', alpha=alpha)
        fig.tight_layout()
        return fig

    def save(self, save_path: Path = '.'):
        save_path = Path(save_path)

        if self.de:
            de = xa.DataArray(self.de.population, dims='pvector name'.split(), coords={'name': self.ps.names})
        else:
            de = None

        if self.sampler is not None:
            mc = xa.DataArray(self.sampler.chain, dims='pvector step name'.split(),
                              coords={'name': self.ps.names}, attrs={'ndim': self.de.n_par, 'npop': self.de.n_pop})
        else:
            mc = None

        ds = xa.Dataset(data_vars={'de_population_lm': de, 'lm_mcmc': mc},
                        attrs={'created': strftime('%Y-%m-%d %H:%M:%S'), 'target': self.name})
        ds.to_netcdf(save_path.joinpath(f'{self.name}.nc'))

    def plot_light_curves(self, method='de', ncol: int = 3, width: float = 2., max_samples: int = 1000, figsize=None,
                          data_alpha=0.5, ylim=None):
        nrow = int(ceil(self.nlc / ncol))
        if method == 'mcmc':
            df = self.posterior_samples(derived_parameters=False)
            t0, p = df.tc.median(), df.p.median()
            fmodel = self.flux_model(permutation(df.values)[:max_samples])
            fmperc = percentile(fmodel, [50, 16, 84, 2.5, 97.5], 0)
        else:
            fmodel = squeeze(self.flux_model(self.de.minimum_location))
            t0, p = self.de.minimum_location[0], self.de.minimum_location[1]
            fmperc = None

        fig, axs = subplots(nrow, ncol, figsize=figsize, constrained_layout=True, sharey='all', sharex='all',
                            squeeze=False)
        for i in range(self.nlc):
            ax = axs.flat[i]
            e = epoch(self.times[i].mean(), t0, p)
            tc = t0 + e * p
            time = self.times[i] - tc

            ax.plot(time, self.fluxes[i], '.', alpha=data_alpha)

            if method == 'de':
                ax.plot(time, fmodel[self.lcslices[i]], 'w', lw=4)
                ax.plot(time, fmodel[self.lcslices[i]], 'k', lw=1)
            else:
                ax.fill_between(time, *fmperc[3:5, self.lcslices[i]], alpha=0.15)
                ax.fill_between(time, *fmperc[1:3, self.lcslices[i]], alpha=0.25)
                ax.plot(time, fmperc[0, self.lcslices[i]])

            setp(ax, xlabel=f'Time - T$_c$ [d]', xlim=(-width / 2 / 24, width / 2 / 24))
        setp(axs[:, 0], ylabel='Normalised flux')

        if ylim is not None:
            setp(axs, ylim=ylim)

        for ax in axs.flat[self.nlc:]:
            ax.remove()
        return fig

    def __repr__(self):
        return f"Target: {self.name}\nLPF: {self._lpf_name}\n Passbands: {self.passbands}"
Example #11
0
def model_galaxy_mcmc(model_file,
                      output_name=None,
                      write_fits=default_filetypes,
                      iterations=0,
                      burn=0,
                      chains=None,
                      max_iterations=1,
                      convergence_check=check_convergence_autocorr):
    """
    Model the surface brightness distribution of a galaxy or galaxies using
    multi-component Markov Chain Monte Carlo parameter estimation.

    :param model_file: Filename of the model definition file. This should be
        a series of components from psfMC.ModelComponents, with parameters
        supplied as either fixed values or stochastics from psfMC.distributions
    :param output_name: Base name for output files (no file extension). By
        default, files are written out containing the requested image types
        (write_fits param) and the MCMC trace database. If None, use
        out_<model_filename>
    :param write_fits: List of which fits file types to write. By default, raw
        (unconvolved) model, convolved model, model IVM, residual, and point
        sources subtracted.
    :param iterations: Number of retained MCMC samples
    :param burn: Number of discarded (burn-in) MCMC samples
    :param chains: Number of individual chains (walkers) to run. If None, the
        minimum number recommended by emcee will be used. More is better.
    :param max_iterations: Maximum sampler iterations before convergence is
        enforced. Default is 1, which means sampler halts even if not converged.
    :param convergence_check: Function taking an emcee Sampler as argument, and
        returning True or False based on whether the sampler has converged.
        Default function returns True when the autocorrelation time of all
        stochastic variables is < 10% of the total number of samples. Sampling
        will be repeated (increasing the chain length) until convergence check
        is met or until max_iterations iterations are performed.
    """
    if output_name is None:
        output_name = 'out_' + model_file.replace('.py', '')
    output_name += '_{}'

    mc_model = MultiComponentModel(components=model_file)

    # If chains is not specified, use the minimum number recommended by emcee
    if chains is None:
        chains = 2 * mc_model.num_params + 2

    # FIXME: can't use threads=n right now because model object can't be pickled
    sampler = EnsembleSampler(nwalkers=chains,
                              dim=mc_model.num_params,
                              lnpostfn=mc_model.log_posterior,
                              kwargs={'model': mc_model})

    # Open database if it exists, otherwise pass backend to create a new one
    db_name = output_name.format('db') + '.fits'

    # TODO: Resume if database exists
    if not os.path.exists(db_name):
        param_vec = mc_model.init_params_from_priors(chains)

        # Run burn-in and discard
        for step, result in enumerate(
                sampler.sample(param_vec, iterations=burn)):
            # Set new initial sampler state
            param_vec = result[0]
            # No need to retain images from every step, so clear blobs
            sampler.clear_blobs()
            print_progress(step, burn, 'Burning')

        sampler.reset()

        converged = False
        for sampling_iter in range(max_iterations):
            # Now run real samples and retain
            for step, result in enumerate(
                    sampler.sample(param_vec, iterations=iterations)):
                mc_model.accumulate_images(result[3])
                # No need to retain images from every step, so clear blobs
                sampler.clear_blobs()
                print_progress(step, iterations, 'Sampling')

            if convergence_check(sampler):
                converged = True
                break
            else:
                warn('Not yet converged after {:d} iterations:'.format(
                    (sampling_iter + 1) * iterations))
                convergence_check(sampler, verbose=1)

        # Collect some metadata about the sampling process. These will be saved
        # in the FITS headers of both the output database and the images
        db_metadata = OrderedDict([('MCITER', sampler.chain.shape[1]),
                                   ('MCBURN', burn), ('MCCHAINS', chains),
                                   ('MCCONVRG', converged),
                                   ('MCACCEPT',
                                    sampler.acceptance_fraction.mean())])
        database = save_database(sampler,
                                 mc_model,
                                 db_name,
                                 meta_dict=db_metadata)
    else:
        print('Database already contains sampled chains, skipping sampling')
        database = load_database(db_name)

    # Write model output files
    save_posterior_images(mc_model,
                          database,
                          output_name=output_name,
                          filetypes=write_fits)
Example #12
0
#print("\nInitializing walkers")
nwalk = 100
params0       = np.tile(guess_list, nwalk).reshape(nwalk, len(guess_list))
#
# perturb walkers around guess
#
for i in xrange(len(guess_list)):
    params0.T[i] += np.random.rand(nwalk) * perturb_list[i]

# hack!
params0.T[2]  = np.absolute(params0.T[2])        # ...and force >= 0

#print("\nInitializing the sampler and burning in walkers")
s = EnsembleSampler(nwalk, params0.shape[-1], bre, threads=4)
pos, prob, state = s.run_mcmc(params0, burn_in)
s.reset()

#print("\nSampling the posterior density for the problem")
s.run_mcmc(pos, samples)

samplea = s.flatchain[:,0]
pylab.plot(samplea)
pylab.xlabel('Step number')
pylab.ylabel('alpha')
pylab.show()
pylab.savefig('alpha.png')

samples = s.flatchain[:,1]
pylab.plot(samples)
pylab.xlabel('Step number')
pylab.ylabel('sigma')
Example #13
0
# initial guesses for the walkers starting locations
guess_q = 1.16389876649
guess_c = 0  #No reason to think c is positive or negative
guess_p = 6  #p is between 1 and 10

params0 = np.tile([guess_q, guess_c, guess_p], nwalk).reshape(nwalk, 3)
params0.T[0] += np.random.rand(nwalk) * 0.025  # Perturb q
params0.T[1] += np.random.rand(nwalk) * 0.1  # Perturb C
params0.T[2] += np.random.rand(nwalk) * 1.5  # Perturb p...
params0.T[2] = np.absolute(params0.T[2])  # ...and force >= 0

print("\nInitializing the sampler and burning in walkers")
s = EnsembleSampler(nwalk, params0.shape[-1], bre, threads=4)
pos, prob, state = s.run_mcmc(params0, 5000)
s.reset()

print("\nSampling the posterior density for the problem")
s.run_mcmc(pos, 10000)
print("Mean acceptance fraction was %.3f" % s.acceptance_fraction.mean())

#
# 1d Marginals
#
print("\nDetails for posterior one-dimensional marginals:")


def textual_boxplot(label, unordered, header):
    n, d = np.size(unordered), np.sort(unordered)
    if (header):
        print((10 * " %15s") % ("", "min", "P5", "P25", "P50", "P75", "P95",
Example #14
0
class LogPosteriorFunction:
    _lpf_name = 'LogPosteriorFunction'

    def __init__(self, name: str, result_dir: Union[Path, str] = '.'):
        """The Log Posterior Function class.

        Parameters
        ----------
        name: str
            Name of the log posterior function instance.
        """
        self.name = name
        self.result_dir = Path(result_dir if result_dir is not None else '.')

        # Declare high-level objects
        # --------------------------
        self.ps = None  # Parametrisation
        self.de = None  # Differential evolution optimiser
        self.sampler = None  # MCMC sampler
        self._local_minimization = None

        # Initialise the additional lnprior list
        # --------------------------------------
        self._additional_log_priors = []

        self._old_de_fitness = None
        self._old_de_population = None

    def print_parameters(self, columns: int = 2):
        columns = max(1, columns)
        for i, p in enumerate(self.ps):
            print(p.__repr__(), end=('\n' if i % columns == columns - 1 else '\t'))

    def _init_parameters(self):
        self.ps = ParameterSet()
        self.ps.freeze()

    def create_pv_population(self, npop=50):
        return self.ps.sample_from_prior(npop)

    def set_prior(self, parameter, prior, *nargs) -> None:
        if isinstance(parameter, str):
            descriptions = self.ps.descriptions
            names = self.ps.names
            if parameter in descriptions:
                parameter = descriptions.index(parameter)
            elif parameter in names:
                parameter = names.index(parameter)
            else:
                params = ', '.join([f"{ln} ({sn})" for ln, sn in zip(self.ps.descriptions, self.ps.names)])
                raise ValueError(f'Parameter "{parameter}" not found from the parameter set: {params}')

        if isinstance(prior, str):
            if prior.lower() in ['n', 'np', 'normal']:
                prior = NP(nargs[0], nargs[1])
            elif prior.lower() in ['u', 'up', 'uniform']:
                prior = UP(nargs[0], nargs[1])
            else:
                raise ValueError(f'Unknown prior "{prior}". Allowed values are (N)ormal and (U)niform.')

        self.ps[parameter].prior = prior

    def lnprior(self, pv: ndarray) -> Union[Iterable, float]:
        """Log prior density for a 1D or 2D array of model parameters.

        Parameters
        ----------
        pv: ndarray
            Either a 1D parameter vector or a 2D parameter array.

        Returns
        -------
            Log prior density for the given parameter vector(s).
        """
        return self.ps.lnprior(pv) + self.additional_priors(pv)

    def additional_priors(self, pv):
        pv = atleast_2d(pv)
        return sum([f(pv) for f in self._additional_log_priors], 0)

    def lnlikelihood(self, pv):
        raise NotImplementedError

    def lnposterior(self, pv):
        lnp = self.lnprior(pv) + self.lnlikelihood(pv)
        return where(isfinite(lnp), lnp, -inf)

    def __call__(self, pv):
        return self.lnposterior(pv)

    def optimize_local(self, pv0=None, method='powell'):
        if pv0 is None:
            if self.de is not None:
                pv0 = self.de.minimum_location
            else:
                pv0 = self.ps.mean_pv
        res = minimize(lambda pv: -self.lnposterior(pv), pv0, method=method)
        self._local_minimization = res

    def optimize_global(self, niter=200, npop=50, population=None, pool=None, lnpost=None, vectorize=True,
                        label='Global optimisation', leave=False, plot_convergence: bool = True, use_tqdm: bool = True,
                        plot_parameters: tuple = (0, 2, 3, 4)):

        lnpost = lnpost or self.lnposterior
        if self.de is None:
            self.de = DiffEvol(lnpost, clip(self.ps.bounds, -1, 1), npop, maximize=True, vectorize=vectorize, pool=pool)
            if population is None:
                self.de._population[:, :] = self.create_pv_population(npop)
            else:
                self.de._population[:, :] = population
        for _ in tqdm(self.de(niter), total=niter, desc=label, leave=leave, disable=(not use_tqdm)):
            pass

        if plot_convergence:
            fig, axs = subplots(1, 1 + len(plot_parameters), figsize=(13, 2), constrained_layout=True)
            rfit = self.de._fitness
            mfit = isfinite(rfit)

            if self._old_de_fitness is not None:
                m = isfinite(self._old_de_fitness)
                axs[0].hist(-self._old_de_fitness[m], facecolor='midnightblue', bins=25, alpha=0.25)
            axs[0].hist(-rfit[mfit], facecolor='midnightblue', bins=25)

            for i, ax in zip(plot_parameters, axs[1:]):
                if self._old_de_fitness is not None:
                    m = isfinite(self._old_de_fitness)
                    ax.plot(self._old_de_population[m, i], -self._old_de_fitness[m], 'kx', alpha=0.25)
                ax.plot(self.de.population[mfit, i], -rfit[mfit], 'k.')
                ax.set_xlabel(self.ps.descriptions[i])
            setp(axs, yticks=[])
            setp(axs[1], ylabel='Log posterior')
            setp(axs[0], xlabel='Log posterior')
            sb.despine(fig, offset=5)
        self._old_de_population = self.de.population.copy()
        self._old_de_fitness = self.de._fitness.copy()

    def sample_mcmc(self, niter: int = 500, thin: int = 5, repeats: int = 1, npop: int = None, population=None,
                    label='MCMC sampling', reset=True, leave=True, save=False, use_tqdm: bool = True, pool=None,
                    lnpost=None, vectorize: bool = True):

        if save and self.result_dir is None:
            raise ValueError('The MCMC sampler is set to save the results, but the result directory is not set.')

        lnpost = lnpost or self.lnposterior
        if self.sampler is None:
            if population is not None:
                pop0 = population
            elif hasattr(self, '_local_minimization') and self._local_minimization is not None:
                pop0 = multivariate_normal(self._local_minimization.x, diag(full(len(self.ps), 0.001 ** 2)), size=npop)
            elif self.de is not None:
                pop0 = self.de.population.copy()
            else:
                raise ValueError('Sample MCMC needs an initial population.')
            self.sampler = EnsembleSampler(pop0.shape[0], pop0.shape[1], lnpost, vectorize=vectorize, pool=pool)
        else:
            pop0 = self.sampler.chain[:, -1, :].copy()

        for i in tqdm(range(repeats), desc=label, disable=(not use_tqdm), leave=leave):
            if reset or i > 0:
                self.sampler.reset()
            for _ in tqdm(self.sampler.sample(pop0, iterations=niter, thin=thin), total=niter,
                          desc='Run {:d}/{:d}'.format(i + 1, repeats), leave=False, disable=(not use_tqdm)):
                pass
            if save:
                self.save(self.result_dir)
            pop0 = self.sampler.chain[:, -1, :].copy()

    def posterior_samples(self, burn: int = 0, thin: int = 1):
        fc = self.sampler.chain[:, burn::thin, :].reshape([-1, len(self.ps)])
        df = pd.DataFrame(fc, columns=self.ps.names)
        return df

    def plot_mcmc_chains(self, pid: int = 0, alpha: float = 0.1, thin: int = 1, ax=None):
        fig, ax = (None, ax) if ax is not None else subplots()
        ax.plot(self.sampler.chain[:, ::thin, pid].T, 'k', alpha=alpha)
        fig.tight_layout()
        return fig

    def save(self, save_path: Path = '.'):
        save_path = Path(save_path)
        npar = len(self.ps)

        if self.de:
            de = xa.DataArray(self.de.population, dims='pvector parameter'.split(), coords={'parameter': self.ps.names})
        else:
            de = None

        if self.sampler is not None:
            mc = xa.DataArray(self.sampler.chain, dims='pvector step parameter'.split(),
                              coords={'parameter': self.ps.names}, attrs={'ndim': npar, 'npop': self.sampler.nwalkers})
        else:
            mc = None

        ds = xa.Dataset(data_vars={'de_population': de, 'mcmc_samples': mc},
                        attrs={'created': strftime('%Y-%m-%d %H:%M:%S'), 'name': self.name})
        ds.to_netcdf(save_path.joinpath(f'{self.name}.nc'))

        try:
            if self.sampler is not None:
                fname = save_path / f'{self.name}.fits'
                chains = self.sampler.chain
                nchains = chains.shape[0]
                nsteps = chains.shape[1]
                idch = repeat(arange(nchains), nsteps)
                idst = tile(arange(nsteps), nchains)
                flc = chains.reshape([-1, chains.shape[2]])
                tb1 = Table([idch, idst], names=['chain', 'step'])
                tb1.add_columns(flc.T, names=self.ps.names)
                tb2 = Table([idch, idst], names=['chain', 'step'])
                tb2.add_column(self.sampler.lnprobability.ravel(), name='lnp')
                tbhdu1 = pf.BinTableHDU(tb1, name='posterior')
                tbhdu2 = pf.BinTableHDU(tb2, name='sample_stats')
                hdul = pf.HDUList([pf.PrimaryHDU(), tbhdu1, tbhdu2])
                hdul.writeto(fname, overwrite=True)
        except ValueError:
            print('Could not save the samples in fits format.')

    def __repr__(self):
        return f"Target: {self.name}\nLPF: {self._lpf_name}"
Example #15
0
class BaseLPF:
    _lpf_name = 'base'

    def __init__(self, name: str, passbands: list, times: list = None, fluxes: list = None, errors: list = None,
                 pbids: list = None, covariates: list = None, tm: TransitModel = None,
                 nsamples: tuple = 1, exptimes: tuple = 0.):
        self.tm = tm or QuadraticModel(klims=(0.01, 0.75), nk=512, nz=512)

        # LPF name
        # --------
        self.name = name

        # Passbands
        # ---------
        # Should be arranged from blue to red
        if isinstance(passbands, (list, tuple, ndarray)):
            self.passbands = passbands
        else:
            self.passbands = [passbands]
        self.npb = npb = len(self.passbands)

        self.nsamples = None
        self.exptimes = None

        # Declare high-level objects
        # --------------------------
        self.ps = None          # Parametrisation
        self.de = None          # Differential evolution optimiser
        self.sampler = None     # MCMC sampler
        self.instrument = None  # Instrument
        self.ldsc = None        # Limb darkening set creator
        self.ldps = None        # Limb darkening profile set
        self.cntm = None        # Contamination model

        # Declare data arrays and variables
        # ---------------------------------
        self.nlc: int = 0                # Number of light curves
        self.times: list = None          # List of time arrays
        self.fluxes: list = None         # List of flux arrays
        self.errors: list = None         # List of flux uncertainties
        self.covariates: list = None     # List of covariates
        self.wn: ndarray = None          # Array of white noise estimates for each light curve
        self.timea: ndarray = None       # Array of concatenated times
        self.mfluxa: ndarray = None      # Array of concatenated model fluxes
        self.ofluxa: ndarray = None      # Array of concatenated observed fluxes
        self.errora: ndarray = None      # Array of concatenated model fluxes

        self.lcids: ndarray = None       # Array of light curve indices for each datapoint
        self.pbids: ndarray = None       # Array of passband indices for each light curve
        self.lcslices: list = None       # List of light curve slices

        # Set up the observation data
        # ---------------------------
        if times is not None and fluxes is not None and pbids is not None:
            self._init_data(times, fluxes, pbids, covariates, errors, nsamples, exptimes)

        # Setup parametrisation
        # =====================
        self._init_parameters()

        # Initialise the additional lnprior list
        # --------------------------------------
        self.lnpriors = []

        # Initialise the temporary arrays
        # -------------------------------
        self._zpv = zeros(6)
        self._tuv = zeros((npb, 2))
        self._zeros = zeros(npb)
        self._ones = ones(npb)

        # Inititalise the instrument
        self._init_instrument()

        if times is not None:
            self._bad_fluxes = [ones_like(t) for t in self.times]
        else:
            self._bad_fluxes = None


    def _init_data(self, times, fluxes, pbids, covariates=None, errors=None, nsamples=1, exptimes=0.):

        if isinstance(times, ndarray) and times.dtype == float:
            times = [times]

        if isinstance(fluxes, ndarray) and fluxes.dtype == float:
            fluxes = [fluxes]

        self.nlc = len(times)
        self.times = asarray(times)
        self.fluxes = asarray(fluxes)
        self.pbids = asarray(pbids)
        self.wn = [diff(f).std() / sqrt(2) for f in fluxes]
        self.timea = concatenate(self.times)
        self.ofluxa = concatenate(self.fluxes)
        self.mfluxa = zeros_like(self.ofluxa)
        self.pbids = atleast_1d(pbids).astype('int')
        self.lcids = concatenate([full(t.size, i) for i, t in enumerate(self.times)])

        if isscalar(nsamples):
            self.nsamples = full(self.nlc, nsamples)
            self.exptimes = full(self.nlc, exptimes)
        else:
            assert (len(nsamples) == self.nlc) and (len(exptimes) == self.nlc)
            self.nsamples = asarray(nsamples, 'int')
            self.exptimes = asarray(exptimes)

        self.tm.set_data(self.timea, self.lcids, self.pbids, self.nsamples, self.exptimes)

        if errors is None:
            self.errors = array([full(t.size, nan) for t in self.times])
        else:
            self.errors = asarray(errors)
        self.errora = concatenate(self.errors)

        # Initialise the light curves slices
        # ----------------------------------
        self.lcslices = []
        sstart = 0
        for i in range(self.nlc):
            s = self.times[i].size
            self.lcslices.append(s_[sstart:sstart + s])
            sstart += s

        # Initialise the covariate arrays, if given
        # -----------------------------------------
        if covariates is not None:
            self.covariates = covariates
            for cv in self.covariates:
                cv[:, 1:] = (cv[:, 1:] - cv[:, 1:].mean(0)) / cv[:, 1:].ptp(0)
            self.ncovs = self.covariates[0].shape[1]
            self.covsize = array([c.size for c in self.covariates])
            self.covstart = concatenate([[0], self.covsize.cumsum()[:-1]])
            self.cova = concatenate(self.covariates)

    def _init_parameters(self):
        self.ps = ParameterSet()
        self._init_p_orbit()
        self._init_p_planet()
        self._init_p_limb_darkening()
        self._init_p_baseline()
        self._init_p_noise()
        self.ps.freeze()

    def _init_p_orbit(self):
        """Orbit parameter initialisation.
        """
        porbit = [
            GParameter('tc',  'zero_epoch',       'd',      N(0.0,  0.1), (-inf, inf)),
            GParameter('pr',  'period',           'd',      N(1.0, 1e-5), (0,    inf)),
            GParameter('rho', 'stellar_density',  'g/cm^3', U(0.1, 25.0), (0,    inf)),
            GParameter('b',   'impact_parameter', 'R_s',    U(0.0,  1.0), (0,      1))]
        self.ps.add_global_block('orbit', porbit)

    def _init_p_planet(self):
        """Planet parameter initialisation.
        """
        pk2 = [PParameter('k2', 'area_ratio', 'A_s', GM(0.1), (0.01**2, 0.55**2))]
        self.ps.add_passband_block('k2', 1, 1, pk2)
        self._pid_k2 = repeat(self.ps.blocks[-1].start, self.npb)
        self._start_k2 = self.ps.blocks[-1].start
        self._sl_k2 = self.ps.blocks[-1].slice

    def _init_p_limb_darkening(self):
        """Limb darkening parameter initialisation.
        """
        pld = concatenate([
            [PParameter('q1_{:d}'.format(i), 'q1_coefficient', '', U(0, 1), bounds=(0, 1)),
             PParameter('q2_{:d}'.format(i), 'q2_coefficient', '', U(0, 1), bounds=(0, 1))]
            for i in range(self.npb)])
        self.ps.add_passband_block('ldc', 2, self.npb, pld)
        self._sl_ld = self.ps.blocks[-1].slice
        self._start_ld = self.ps.blocks[-1].start

    def _init_p_baseline(self):
        """Baseline parameter initialisation.
        """
        pass

    def _init_p_noise(self):
        """Noise parameter initialisation.
        """
        pns = [LParameter('lne_{:d}'.format(i), 'log_error_{:d}'.format(i), '', U(-8, -0), bounds=(-8, -0)) for i in range(self.nlc)]
        self.ps.add_lightcurve_block('log_err', 1, self.nlc, pns)
        self._sl_err = self.ps.blocks[-1].slice
        self._start_err = self.ps.blocks[-1].start

    def _init_instrument(self):
        pass

    def create_pv_population(self, npop=50):
        pvp = self.ps.sample_from_prior(npop)
        for sl in self.ps.blocks[1].slices:
            pvp[:,sl] = uniform(0.01**2, 0.25**2, size=(npop, 1))

        # With LDTk
        # ---------
        #
        # Use LDTk to create the sample if LDTk has been initialised.
        #
        if self.ldps:
            istart = self._start_ld
            cms, ces = self.ldps.coeffs_tq()
            for i, (cm, ce) in enumerate(zip(cms.flat, ces.flat)):
                pvp[:, i + istart] = normal(cm, ce, size=pvp.shape[0])

        # No LDTk
        # -------
        #
        # Ensure that the total limb darkening decreases towards
        # red passbands.
        #
        else:
            ldsl = self._sl_ld
            for i in range(pvp.shape[0]):
                pid = argsort(pvp[i, ldsl][::2])[::-1]
                pvp[i, ldsl][::2] = pvp[i, ldsl][::2][pid]
                pvp[i, ldsl][1::2] = pvp[i, ldsl][1::2][pid]

        # Estimate white noise from the data
        # ----------------------------------
        for i in range(self.nlc):
            wn = diff(self.ofluxa).std() / sqrt(2)
            pvp[:, self._start_err] = log10(uniform(0.5*wn, 2*wn, size=npop))
        return pvp

    def baseline(self, pv):
        """Multiplicative baseline"""
        return 1.

    def trends(self, pv):
        """Additive trends"""
        return 0.

    def transit_model(self, pv):
        pv = atleast_2d(pv)
        pvp = map_pv(pv)
        ldc = map_ldc(pv[:,self._sl_ld])
        flux = self.tm.evaluate_pv(pvp, ldc)
        return flux

    def flux_model(self, pv):
        baseline    = self.baseline(pv)
        trends      = self.trends(pv)
        model_flux = self.transit_model(pv)
        return baseline * model_flux + trends

    def residuals(self, pv):
        return self.ofluxa - self.flux_model(pv)

    def set_prior(self, pid: int, prior) -> None:
            self.ps[pid].prior = prior

    def add_t14_prior(self, mean: float, std: float) -> None:
        """Add a normal prior on the transit duration.

        Parameters
        ----------
        mean
        std

        Returns
        -------

        """
        def T14(pv):
            a = as_from_rhop(pv[2], pv[1])
            t14 = duration_eccentric(pv[1], sqrt(pv[4]), a, mt.acos(pv[3] / a), 0, 0, 1)
            return norm.logpdf(t14, mean, std)
        self.lnpriors.append(T14)

    def add_as_prior(self, mean: float, std: float) -> None:
        """Add a prior on the scaled semi-major axis

        Parameters
        ----------
        mean
        std

        Returns
        -------

        """
        def as_prior(pv):
            a = as_from_rhop(pv[2], pv[1])
            return norm.logpdf(a, mean, std)
        self.lnpriors.append(as_prior)

    def add_ldtk_prior(self, teff: tuple, logg: tuple, z: tuple,
                       uncertainty_multiplier: float = 3,
                       pbs: tuple = ('g', 'r', 'i', 'z')) -> None:
        """Add a LDTk-based prior on the limb darkening.

        Parameters
        ----------
        teff
        logg
        z
        uncertainty_multiplier
        pbs

        Returns
        -------

        """
        fs = {n: f for n, f in zip('g r i z'.split(), (sdss_g, sdss_r, sdss_i, sdss_z))}
        filters = [fs[k] for k in pbs]
        self.ldsc = LDPSetCreator(teff, logg, z, filters)
        self.ldps = self.ldsc.create_profiles(1000)
        self.ldps.resample_linear_z()
        self.ldps.set_uncertainty_multiplier(uncertainty_multiplier)
        def ldprior(pv):
            return self.ldps.lnlike_tq(pv[self._sl_ld])
        self.lnpriors.append(ldprior)

    def remove_outliers(self, sigma=5):
        fmodel = self.flux_model(self.de.minimum_location)
        times, fluxes, pbids, errors = [], [], [], []
        for i in range(len(self.times)):
            res = self.fluxes[i] - fmodel[i]
            mask = ~sigma_clip(res, sigma=sigma).mask
            times.append(self.times[i][mask])
            fluxes.append(self.fluxes[i][mask])
            if self.errors is not None:
                errors.append(self.errors[i][mask])
        self._init_data(times, fluxes, self.pbids, (errors if self.errors is not None else None))

    def remove_transits(self, tids):
        m = ones(len(self.times), bool)
        m[tids] = False
        self._init_data(self.times[m], self.fluxes[m], self.pbids[m],
                        self.covariates[m] if self.covariates is not None else None,
                        self.errors[m], self.nsamples[m], self.exptimes[m])
        self._init_parameters()

    def lnprior(self, pv):
        return self.ps.lnprior(pv) + self.additional_priors(pv)

    def additional_priors(self, pv):
        """Additional priors."""
        pv = atleast_2d(pv)
        return sum([f(pv) for f in self.lnpriors], 0)

    def lnlikelihood(self, pv):
        flux_m = self.flux_model(pv)
        wn = 10**(atleast_2d(pv)[:,self._sl_err])
        return lnlike_normal_v(self.ofluxa, flux_m, wn, self.lcids)

    def lnposterior(self, pv):
        lnp = self.lnprior(pv) + self.lnlikelihood(pv)
        return where(isfinite(lnp), lnp, -inf)

    def __call__(self, pv):
        return self.lnposterior(pv)

    def optimize_global(self, niter=200, npop=50, population=None, label='Global optimisation', leave=False):
        if self.de is None:
            self.de = DiffEvol(self.lnposterior, clip(self.ps.bounds, -1, 1), npop, maximize=True, vectorize=True)
            if population is None:
                self.de._population[:, :] = self.create_pv_population(npop)
            else:
                self.de._population[:,:] = population
        for _ in tqdm(self.de(niter), total=niter, desc=label, leave=leave):
            pass

    def sample_mcmc(self, niter=500, thin=5, label='MCMC sampling', reset=False, leave=True):
        if self.sampler is None:
            self.sampler = EnsembleSampler(self.de.n_pop, self.de.n_par, self.lnposterior, vectorize=True)
            pop0 = self.de.population
        else:
            pop0 = self.sampler.chain[:,-1,:].copy()
        if reset:
            self.sampler.reset()
        for _ in tqdm(self.sampler.sample(pop0, iterations=niter, thin=thin), total=niter, desc=label, leave=False):
            pass

    def posterior_samples(self, burn: int=0, thin: int=1, include_ldc: bool=False):
        ldstart = self._sl_ld.start
        fc = self.sampler.chain[:, burn::thin, :].reshape([-1, self.de.n_par])
        d = fc if include_ldc else fc[:, :ldstart]
        n = self.ps.names if include_ldc else self.ps.names[:ldstart]
        return pd.DataFrame(d, columns=n)

    def plot_mcmc_chains(self, pid: int=0, alpha: float=0.1, thin: int=1, ax=None):
        fig, ax = (None, ax) if ax is not None else subplots()
        ax.plot(self.sampler.chain[:, ::thin, pid].T, 'k', alpha=alpha)
        fig.tight_layout()
        return fig


    def __repr__(self):
        s  = f"""Target: {self.name}
  LPF: {self._lpf_name}
  Passbands: {self.passbands}"""
        return s
Example #16
0
class Sampler:
    """
    wrapper of emcee.EnsembleSampler. 
    """
    def __init__(self, lnpost, p0, keys, nwalkers=120):
        self.lnpost = lnpost
        self.sampler = EnsembleSampler(nwalkers,
                                       p0.shape[1],
                                       lnpost,
                                       threads=15)
        self.p0 = p0
        self.p = p0
        self.keys = keys
        self.ndim = len(keys)

    def reset_sampler(self):
        self.sampler.reset()

    def sample(self, n_sample, burnin=False):
        """
        execute mcmc for given iteration steps.
        """
        desc = "burnin" if burnin else "sample"
        iteration = tqdm(self.sampler.sample(self.p, iterations=n_sample),
                         total=n_sample,
                         desc=desc)
        for _ret in iteration:
            self.p = _ret[0] if emcee_major_version == "2" else _ret.coords
            lnposts = _ret[1]
            iteration.set_postfix(
                lnpost_min=f"{np.min(lnposts):.5e}",  #np.min(lnposts),
                lnpost_max=f"{np.max(lnposts):.5e}",  #np.max(lnposts),
                lnpost_mean=f"{np.mean(lnposts):.5e}"  #np.mean(lnposts)
            )
        if burnin:
            self.reset_sampler()

    @property
    def df(self):
        """
        convert sampler.chain into pandas.DataFrame for convenience.
        """
        _df = DF(self.sampler.flatchain)
        _df = _df.rename(columns={i: key for i, key in enumerate(self.keys)})
        _df["lnpost"] = self.sampler.flatlnprobability
        return _df

    def save_chain(self, fname):
        self.df.to_pickle(fname)

    def plot_chain(self, kwargs_subplots={}, **kwargs):
        fig, ax = plt.subplots(self.ndim + 1, **kwargs_subplots)
        for i in range(self.ndim):
            ax[i].plot(self.sampler.chain[:, :, i].T,
                       **kwargs)  # [nwalkers,nsample,ndim]
            ax[i].set_ylabel(self.keys[i])
        ax[self.ndim].plot(self.sampler.lnprobability.T,
                           **kwargs)  # [nwalkers,nsample,ndim]
        ax[self.ndim].set_ylabel("lnpost")

    def plot_hist(self, **kwargs):
        self.df.hist(**kwargs)

    def map_estimater(self):
        _i = self.df.lnpost.idxmax()
        return self.df.iloc[i]
Example #17
0
class OCLBaseLPF(BaseLPF):
    def __init__(self, target: str, passbands: list, times: list = None, fluxes: list = None, errors: list = None,
                 pbids: list = None, covariates: list = None, nsamples: tuple = None, exptimes: tuple = None,
                 klims: tuple = (0.01, 0.75), nk: int = 512, nz: int = 512, cl_ctx=None, cl_queue=None, **kwargs):

        self.cl_ctx = cl_ctx or self.tm.ctx
        self.cl_queue = cl_queue or self.tm.queue
        self.cl_lnl_chunks = kwargs.get('cl_lnl_chunks', 1)
        super().__init__(target, passbands, times, fluxes, errors, pbids, covariates, None, nsamples, exptimes)

        self.tm = QuadraticModelCL(klims=klims, nk=nk, nz=nz, cl_ctx=cl_ctx, cl_queue=cl_queue)
        self.tm.set_data(self.timea, self.lcids, self.pbids, self.nsamples, self.exptimes)

        src = """
           __kernel void lnl2d(const int nlc, __global const float *obs, __global const float *mod, __global const float *err, __global const int *lcids, __global float *lnl2d){
                  uint i_tm = get_global_id(1);    // time vector index
                  uint n_tm = get_global_size(1);  // time vector size
                  uint i_pv = get_global_id(0);    // parameter vector index
                  uint n_pv = get_global_size(0);  // parameter vector population size
                  uint gid = i_pv*n_tm + i_tm;     // global linear index
                  float e = err[i_pv*nlc + lcids[i_tm]];
                  lnl2d[gid] = -log(e) - 0.5f*log(2*M_PI_F) - 0.5f*pown((obs[i_tm]-mod[gid]) / e, 2);
            }

            __kernel void lnl1d(const uint npt, __global float *lnl2d, __global float *lnl1d){
                  uint i_pv = get_global_id(0);    // parameter vector index
                  uint n_pv = get_global_size(0);  // parameter vector population size
            
                int i;
                bool is_even;
                uint midpoint = npt;
                __global float *lnl = &lnl2d[i_pv*npt];
                
                while(midpoint > 1){
                    is_even = midpoint % 2 == 0;   
                    if (is_even == 0){
                        lnl[0] += lnl[midpoint-1];
                    }
                    midpoint /= 2;
                    
                    for(i=0; i<midpoint; i++){
                        lnl[i] = lnl[i] + lnl[midpoint+i];
                    }
                }
                lnl1d[i_pv] = lnl[0];
            }
            
            __kernel void lnl1d_chunked(const uint npt, __global float *lnl2d, __global float *lnl1d){
                uint ipv = get_global_id(0);    // parameter vector index
                uint npv = get_global_size(0);  // parameter vector population size
                uint ibl = get_global_id(1);    // block index
                uint nbl = get_global_size(1);  // number of blocks
                uint lnp = npt / nbl;
                  
                __global float *lnl = &lnl2d[ipv*npt + ibl*lnp];
              
                if(ibl == nbl-1){
                    lnp = npt - (ibl*lnp);
                }
            
                prefetch(lnl, lnp);
                bool is_even;
                uint midpoint = lnp;
                while(midpoint > 1){
                    is_even = midpoint % 2 == 0;   
                    if (is_even == 0){
                        lnl[0] += lnl[midpoint-1];
                    }
                    midpoint /= 2;
            
                    for(int i=0; i<midpoint; i++){
                        lnl[i] = lnl[i] + lnl[midpoint+i];
                    }
                }
                lnl1d[ipv*nbl + ibl] = lnl[0];
            }
        """
        self.prg_lnl = cl.Program(self.cl_ctx, src).build()
        self.lnlikelihood = self.lnlikelihood_ocl


    def _init_data(self, times, fluxes, pbids, covariates=None, errors=None, nsamples=None, exptimes=None):
        super()._init_data(times, fluxes, pbids, covariates, errors, nsamples, exptimes)
        self.nlc = int32(self.nlc)

        # Initialise the Python arrays
        # ----------------------------
        self.timea = self.timea.astype('f')
        self.ofluxa = self.ofluxa.astype('f')
        self.lnl2d = zeros([50, self.ofluxa.size], 'f')
        self.lnl1d = zeros([self.lnl2d.shape[0], self.cl_lnl_chunks], 'f')
        self.ferr = zeros([50, self.nlc])
        self.lcids = self.lcids.astype('int32')
        self.pbids = self.pbids.astype('int32')
        if covariates is not None:
            self.cova = self.cova.astype('f')

        # Initialise OpenCL buffers
        # -------------------------
        mf = cl.mem_flags
        self._b_flux = cl.Buffer(self.cl_ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.ofluxa)
        self._b_err = cl.Buffer(self.cl_ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.ferr)
        self._b_lnl2d = cl.Buffer(self.cl_ctx, mf.WRITE_ONLY, self.lnl2d.nbytes)
        self._b_lnl1d = cl.Buffer(self.cl_ctx, mf.WRITE_ONLY, self.lnl1d.nbytes)
        self._b_lcids = cl.Buffer(self.cl_ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.lcids)
        if covariates is not None:
            self._b_covariates = cl.Buffer(self.cl_ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.cova)

    def transit_model(self, pvp, copy=False):
        pvp = atleast_2d(pvp)
        pvp_t = zeros([pvp.shape[0], 8], "f")
        uv = zeros([pvp.shape[0], 2], "f")
        pvp_t[:, 0:1] = sqrt(pvp[:, self._pid_k2])  # Radius ratio
        pvp_t[:, 1:3] = pvp[:, 0:2]                 # Transit centre and orbital period
        pvp_t[:, 3] = a = as_from_rhop(pvp[:, 2], pvp[:, 1])
        pvp_t[:, 4] = i_from_ba(pvp[:, 3], a)
        a, b = sqrt(pvp[:, self._sl_ld][:, 0]), 2. * pvp[:, self._sl_ld][:, 1]
        uv[:, 0] = a * b
        uv[:, 1] = a * (1. - b)
        flux = self.tm.evaluate_t_pv2d(pvp_t, uv, copy=copy)
        return flux if copy else None

    def flux_model(self, pvp):
        return self.transit_model(pvp, copy=True).astype('d')

    def _lnl2d(self, pv):
        if self.lnl2d.shape[0] != pv.shape[0] or self.lnl1d.size != pv.shape[0]:
            self.err = zeros([pv.shape[0], self.nlc], 'f')
            self._b_err.release()
            self._b_err = cl.Buffer(self.cl_ctx, cl.mem_flags.WRITE_ONLY, self.err.nbytes)
            self.lnl2d = zeros([pv.shape[0], self.ofluxa.size], 'f')
            self._b_lnl2d.release()
            self._b_lnl2d = cl.Buffer(self.cl_ctx, cl.mem_flags.WRITE_ONLY, self.lnl2d.nbytes)
            self.lnl1d = zeros([pv.shape[0], self.cl_lnl_chunks], 'f')
            if self._b_lnl1d:
                self._b_lnl1d.release()
            self._b_lnl1d = cl.Buffer(self.cl_ctx, cl.mem_flags.WRITE_ONLY, self.lnl1d.nbytes)
        self.transit_model(pv)
        cl.enqueue_copy(self.cl_queue, self._b_err, (10 ** pv[:, self._sl_err]).astype('f'))
        self.prg_lnl.lnl2d(self.cl_queue, self.tm.f.shape, None, self.nlc, self._b_flux, self.tm._b_f,
                           self._b_err, self._b_lcids, self._b_lnl2d)


    def lnlikelihood_numba(self, pv):
        self._lnl2d(pv)
        cl.enqueue_copy(self.cl_queue, self.lnl2d, self._b_lnl2d)
        lnl = psum2d(self.lnl2d)
        return where(isfinite(lnl), lnl, -inf)


    def lnlikelihood_ocl(self, pv):
        self._lnl2d(pv)
        self.prg_lnl.lnl1d_chunked(self.cl_queue, [self.lnl2d.shape[0], self.cl_lnl_chunks], None,
                                   uint32(self.lnl2d.shape[1]), self._b_lnl2d, self._b_lnl1d)
        cl.enqueue_copy(self.cl_queue, self.lnl1d, self._b_lnl1d)
        lnl = self.lnl1d.astype('d').sum(1)
        return lnl

    def lnlikelihood_numpy(self, pv):
        self._lnl2d(pv)
        cl.enqueue_copy(self.cl_queue, self.lnl2d, self._b_lnl2d)
        lnl = self.lnl2d.astype('d').sum(1)
        return where(isfinite(lnl), lnl, -inf)

    def lnprior(self, pv):
        lnpriors = zeros(pv.shape[0])
        for i, p in enumerate(self.ps.priors):
            lnpriors += p.logpdf(pv[:, i])
        return lnpriors + self.additional_priors(pv)

    def lnposterior(self, pv):
        lnp = self.lnlikelihood(pv) + self.lnprior(pv)
        return where(isfinite(lnp), lnp, -inf)

    def optimize_global(self, niter=200, npop=50, population=None, label='Global optimisation', leave=False):
        if self.de is None:
            self.de = DiffEvol(self.lnposterior, clip(self.ps.bounds, -1, 1), npop, maximize=True, vectorize=True)
            if population is None:
                self.de._population[:, :] = self.create_pv_population(npop)
            else:
                self.de._population[:, :] = population
        for _ in tqdm(self.de(niter), total=niter, desc=label, leave=leave):
            pass

    def sample_mcmc(self, niter=500, thin=5, label='MCMC sampling', reset=False, leave=True):
        if not with_emcee:
            raise ImportError('Emcee not installed.')
        if self.sampler is None:
            self.sampler = EnsembleSampler(self.de.n_pop, self.de.n_par, self.lnposterior, vectorize=True)
            pop0 = self.de.population
        else:
            pop0 = self.sampler.chain[:, -1, :].copy()
        if reset:
            self.sampler.reset()
        for _ in tqdm(self.sampler.sample(pop0, iterations=niter, thin=thin), total=niter, desc=label, leave=False):
            pass


    def remove_outliers(self, sigma=5):
        fmodel = self.flux_model(self.de.minimum_location)[0]
        times, fluxes, pbids, errors = [], [], [], []
        for i in range(len(self.times)):
            res = self.fluxes[i] - fmodel[i]
            mask = ~sigma_clip(res, sigma=sigma).mask
            times.append(self.times[i][mask])
            fluxes.append(self.fluxes[i][mask])
            if self.errors is not None:
                errors.append(self.errors[i][mask])
        self._init_data(times, fluxes, self.pbids, (errors if self.errors is not None else None))
Example #18
0
import numpy as np
from emcee import EnsembleSampler

def lnprob(p):
    x, y = p
    lnp = -((1.0 - x)**2 + 100 * (y - x**2)**2)
    return lnp


ndim, nwalkers = 2, 40

p0 = np.array([np.random.rand(ndim) for i in range(nwalkers)])

sampler = EnsembleSampler(nwalkers, ndim, lnprob)
p0, prob, state = sampler.run_mcmc(p0, 10000)
#
sampler.reset()
#
p0, prob, state = sampler.run_mcmc(p0, 10000)
#
np.save("chain.npy", sampler.chain)
Example #19
0
class Sampler:
    """
    wrapper of emcee.EnsembleSampler. 
    """
    def __init__(self, lnpost, p0, nwalkers=120, blobs_dtype=float):
        """
        init
        """

        self.lnpost = lnpost
        blobs_dtype = blobs_dtype  # Note: Here dtype must be specified, otherwise an error happens. #[("lnlike",float),]
        self.sampler = EnsembleSampler(
            nwalkers, p0.shape[1], lnpost, blobs_dtype=blobs_dtype
        )  # NOTE: dtype must be list of tuple (not tuple of tuple)
        self.p0 = p0
        self.p_last = p0
        self.ndim = p0.shape[1]

    def reset_sampler(self):
        self.sampler.reset()

    def sample(self, n_sample, burnin=False, use_pool=False):
        """
        execute mcmc for given iteration steps.
        """
        desc = "burnin" if burnin else "sample"

        with Pool() as pool:
            self.sampler.pool = pool if use_pool else None
            iteration = tqdm(self.sampler.sample(self.p_last,
                                                 iterations=n_sample),
                             total=n_sample,
                             desc=desc)
            for _ret in iteration:
                self.p_last = _ret.coords  # if uses_emcee3 else _ret[0]  # for emcee2
                lnposts = _ret.log_prob  # if uses_emcee3 else _ret[1]  # for emcee2
                iteration.set_postfix(lnpost_min=np.min(lnposts),
                                      lnpost_max=np.max(lnposts),
                                      lnpost_mean=np.mean(lnposts))
            if burnin:
                self.reset_sampler()

    def get_chain(self, **kwargs):
        return self.sampler.get_chain(**kwargs)

    def get_log_prob(self, **kwargs):
        return self.sampler.get_log_prob(**kwargs)

    def get_blobs(self, **kwargs):
        return self.sampler.get_blobs(**kwargs)

    def get_last_sample(self, **kwargs):
        return self.sampler.get_last_sample(**kwargs)

    def _save(self, fname_base):
        np.save(fname_base + "_chain.npy", self.get_chain())
        np.save(fname_base + "_lnprob.npy", self.get_log_prob())
        np.save(fname_base + "_lnlike.npy", self.get_blobs())

    def save(self, fname_base):
        '''
        Save MCMC results into "<fname_base>_chain/lnprob/lnlike.npy".
        If fname_base is like "your_directory/your_prefix", create "your_directory" before saving.
        '''
        dirname = os.path.dirname(fname_base)
        if dirname == "":
            self._save(fname_base)
        else:
            if not os.path.isdir(dirname): os.mkdir(dirname)
            self._save(fname_base)

    def save_pickle(self, fname_base, overwrite=False):
        fname = fname_base + '_.gz'
        if os.path.exists(fname):
            if overwrite:
                warn(f"{fname} exsits already. It will be overwritten.")
            else:
                raise RuntimeError(
                    f"{fname} exsits already. If you want to overwrite it, set \"overwrite=True\"."
                )
        data = pickle.dumps(self)
        with gzip.open(fname, mode='wb') as fp:
            fp.write(data)
Example #20
0
class LPFunction(object):
    """A basic log posterior function class.
    """

    def __init__(self, time, flux, nthreads=1):

        # Set up the transit model
        # ------------------------
        self.tm = MA(interpolate=True, klims=(0.08, 0.13), nthr=nthreads)
        self.nthr = nthreads

        # Initialise data
        # ---------------
        self.time = time.copy() if time is not None else array([])
        self.flux_o = flux.copy() if flux is not None else array([])
        self.npt = self.time.size

        # Set the optimiser and the MCMC sampler
        # --------------------------------------
        self.de = None
        self.sampler = None

        # Set up the parametrisation and priors
        # -------------------------------------
        psystem = [
            GParameter('tc', 'zero_epoch', 'd', NP(1.01, 0.02), (-inf, inf)),
            GParameter('pr', 'period', 'd', NP(2.50, 1e-7), (0, inf)),
            GParameter('rho', 'stellar_density', 'g/cm^3', UP(0.90, 2.50), (0.90, 2.5)),
            GParameter('b', 'impact_parameter', 'R_s', UP(0.00, 1.00), (0.00, 1.0)),
            GParameter('k2', 'area_ratio', 'A_s', UP(0.08 ** 2, 0.13 ** 2), (1e-8, inf))]

        pld = [
            PParameter('q1', 'q1_coefficient', '', UP(0, 1), bounds=(0, 1)),
            PParameter('q2', 'q2_coefficient', '', UP(0, 1), bounds=(0, 1))]

        pbl = [LParameter('es', 'white_noise', '', UP(1e-6, 1e-2), bounds=(1e-6, 1e-2))]
        per = [LParameter('bl', 'baseline', '', NP(1.00, 0.001), bounds=(0.8, 1.2))]

        self.ps = ParameterSet()
        self.ps.add_global_block('system', psystem)
        self.ps.add_passband_block('ldc', 2, 1, pld)
        self.ps.add_lightcurve_block('baseline', 1, 1, pbl)
        self.ps.add_lightcurve_block('error', 1, 1, per)
        self.ps.freeze()

    def compute_baseline(self, pv):
        """Constant baseline model"""
        return full_like(self.flux_o, pv[8])

    def compute_transit(self, pv):
        """Transit model"""
        _a = as_from_rhop(pv[2], pv[1])  # Scaled semi-major axis from stellar density and orbital period
        _i = mt.acos(pv[3] / _a)  # Inclination from impact parameter and semi-major axis
        _k = mt.sqrt(pv[4])  # Radius ratio from area ratio

        a, b = mt.sqrt(pv[5]), 2 * pv[6]
        _uv = array([a * b, a * (1. - b)])  # Quadratic limb darkening coefficients

        return self.tm.evaluate(self.time, _k, _uv, pv[0], pv[1], _a, _i)

    def compute_lc_model(self, pv):
        """Combined baseline and transit model"""
        return self.compute_baseline(pv) * self.compute_transit(pv)

    def lnprior(self, pv):
        """Log prior"""
        if any(pv < self.ps.lbounds) or any(pv > self.ps.ubounds):
            return -inf
        else:
            return self.ps.lnprior(pv)

    def lnlikelihood(self, pv):
        """Log likelihood"""
        flux_m = self.compute_lc_model(pv)
        return ll_normal_es(self.flux_o, flux_m, pv[7])

    def lnposterior(self, pv):
        """Log posterior"""
        lnprior = self.lnprior(pv)
        if isinf(lnprior):
            return lnprior
        else:
            return lnprior + self.lnlikelihood(pv)

    def create_pv_population(self, npop=50):
        return self.ps.sample_from_prior(npop)

    def optimize(self, niter=200, npop=50, population=None, label='Optimisation'):
        """Global optimisation using Differential evolution"""
        if self.de is None:
            self.de = DiffEvol(self.lnposterior, clip(self.ps.bounds, -1, 1), npop, maximize=True)
            if population is None:
                self.de._population[:, :] = self.create_pv_population(npop)
            else:
                self.de._population[:, :] = population
        for _ in tqdm(self.de(niter), total=niter, desc=label):
            pass

    def sample(self, niter=500, thin=5, label='MCMC sampling', reset=False):
        """MCMC sampling using emcee"""
        if self.sampler is None:
            self.sampler = EnsembleSampler(self.de.n_pop, self.de.n_par, self.lnposterior)
            pop0 = self.de.population
        else:
            pop0 = self.sampler.chain[:, -1, :].copy()
        if reset:
            self.sampler.reset()
        for _ in tqdm(self.sampler.sample(pop0, iterations=niter, thin=thin), total=niter, desc=label):
            pass
Example #21
0
def predict_label_mcmc(theta0,
                       svrs,
                       flux_obs,
                       flux_ivar,
                       mask,
                       theta_lb=None,
                       theta_ub=None,
                       n_walkers=10,
                       n_burnin=200,
                       n_run=500,
                       threads=1,
                       return_chain=False,
                       mcmc_run_max_iter=5,
                       mcc=0.4,
                       prompt=None,
                       **kwargs):
    """ predict labels using emcee MCMC """
    # theta length
    n_dim = len(theta0)

    # default theta lower/upper bounds
    if theta_lb is None:
        theta_lb = np.ones_like(theta0) * -10.
    if theta_ub is None:
        theta_ub = np.ones_like(theta0) * 10.

    # instantiate EnsambleSampler
    sampler = EnsembleSampler(n_walkers,
                              n_dim,
                              lnprob,
                              args=(svrs, flux_obs, flux_ivar, mask, theta_lb,
                                    theta_ub),
                              threads=threads)  # **kwargs?

    # burn in
    pos0 = [
        theta0 + np.random.uniform(-1, 1, size=(len(theta0), )) * 1.e-3
        for _ in range(n_walkers)
    ]
    pos, prob, rstate = sampler.run_mcmc(pos0, n_burnin)

    # run mcmc
    for i_run in range(mcmc_run_max_iter):
        print("--------------------------------------------------------------")
        print(prompt, " i_run : ", i_run)
        print(prompt, " Current pos : \n", pos)

        # new position
        pos_new, state, pos_best = check_chains(sampler,
                                                pos,
                                                theta_lb,
                                                theta_ub,
                                                mode_list=['bounds'])
        print(prompt, " New pos : ", pos_new)
        print(prompt, " Best pos : ", pos_best)

        if np.any(np.logical_not(state)):
            print(prompt, " Chain states : ", state)
            print(prompt, " RESET chain : ",
                  np.arange(0,
                            len(state) + 1)[state])

        # maximum correlation coefficients
        mcc_qtl, mcc_mat = sampler_mcc(sampler)
        # state_mcc = True --> not any out of threshold --> good chain
        state_mcc = ~np.any(np.abs(mcc_qtl) >= mcc)

        print(prompt, " *** MCC quantiles *** : ", mcc_qtl)
        # print(prompt, " MCC_MAT : -----------------------------------------")
        # for i in range(mcc_mat.shape[2]):
        #     print(prompt, " MCC_MAT[:,:,%s]: " % i, mcc_mat[:, :, i])

        # if chains are good, break and do statistics
        if state_mcc and i_run > 0:
            break

        # else continue running
        sampler.reset()
        pos, prob, rstate = sampler.run_mcmc(pos_new, n_run)

    print(prompt, ' state_mcc : ', state_mcc)

    # estimate percentiles
    theta_est_mcmc = np.nanpercentile(sampler.flatchain, [15., 50., 85.],
                                      axis=0)

    # format of theta_est_mcmc:
    # array([theta_p15,
    #        theta_p50,
    #        theta_p85])
    # e.g.:
    # array([[ 3.21908185,  5.66655696,  8.99618546],
    #        [ 3.22411158,  5.68827311,  9.08791289],
    #        [ 3.22909087,  5.71157073,  9.17812294]])

    # sampler is not returned, for saving memory
    if return_chain:
        result = {
            'theta': theta_est_mcmc,
            'state_mcc': state_mcc,
            'mcc_qtl': mcc_qtl,
            'mcc_mat': mcc_mat,
            'i_run': i_run,
            'flatchain': sampler.flatchain
        }
    else:
        result = {
            'theta': theta_est_mcmc,
            'state_mcc': state_mcc,
            'mcc_qtl': mcc_qtl,
            'mcc_mat': mcc_mat,
            'i_run': i_run
        }

    return result
Example #22
0
    def run_mcmc(self,
                 obs,
                 obs_err=0,
                 obs_tag=None,
                 p0=None,
                 n_burnin=(100, 100),
                 n_step=1000,
                 lnlike=None,
                 lnprior=None,
                 lnlike_kwargs={},
                 lnprior_kwargs={},
                 pos_eps=.1,
                 full=True,
                 shrink="max"):
        """ run MCMC for (obs, obs_err, obs_tag)

        Parameters
        ----------
        obs:
            observable
        obs_err:
            error of observation
        obs_tag:
            array(size(obs), 1 for good, 0 for bad.
        p0:
            initial points
        n_burnin:
            int/sequence, do 1 or multiple times of burn in process
        n_step:
            running step
        lnlike:
            lnlike(x, *args) is the log likelihood function
        lnprior:
            lnpost(x) is the log prior
        pos_eps:
            random magnitude for starting position
        full:
            if True, return sampler, pos, prob, state
            otherwise, return sampler
        shrink:
            default "max": shrink to the maximum likelihood position

        Return
        ------
        EnsembleSampler instance

        """
        if obs_tag is None:
            # default obs_tag
            obs_tag = np.isfinite(obs)
        else:
            obs_tag = np.isfinite(obs) & obs_tag
        # cope with bad obs
        if np.sum(obs_tag) == 0:
            return None

        if p0 is None:
            # do best match for starting position
            p0 = self.best_match(obs, obs_err)
            print("@Regli.best_match: ", p0)

        if lnlike is None:
            # default gaussian likelihood function
            lnlike = default_lnlike
            print("@Regli: using the default gaussian *lnlike* function...")

        if lnprior is None:
            # no prior is specified
            lnpost = lnlike
            print("@Regli: No prior is adopted ...")
        else:
            # use user-defined prior
            print("@Regli: using user-defined *lnprior* function...")

            def lnpost(*_args, **_kwargs):
                lnpost_value = np.float(lnprior(_args[0], **_kwargs["lnprior_kwargs"])) + \
                               lnlike(*_args, **_kwargs["lnlike_kwargs"])
                if np.isfinite(lnpost_value):
                    return lnpost_value
                else:
                    return -np.inf

        # set parameters for sampler
        ndim = self.ndim
        nwalkers = 2 * ndim

        # initiate sampler
        sampler = EnsembleSampler(nwalkers,
                                  ndim,
                                  lnpostfn=lnpost,
                                  args=(self, obs, obs_err, obs_tag),
                                  kwargs=dict(lnprior_kwargs=lnprior_kwargs,
                                              lnlike_kwargs=lnlike_kwargs))

        # generate random starting positions
        pos0 = rand_pos(p0, nwalkers=nwalkers, eps=pos_eps)

        if isinstance(n_burnin, collections.Iterable):
            # [o] multiple burn-ins
            for n_burnin_ in n_burnin:
                # run mcmc
                print("@Regli: running burn-in [{}]...".format(n_burnin_))
                pos, prob, state = sampler.run_mcmc(pos0, n_burnin_)

                # shrink to a new position
                if shrink == "max":
                    p1 = sampler.flatchain[np.argmax(
                        sampler.flatlnprobability)]
                else:
                    p1 = np.median(pos, axis=0)
                pos_std = np.std(sampler.flatchain, axis=0) * 0.5

                # reset sampler
                sampler.reset()

                # randomly generate new start position
                pos0 = rand_pos(p1, nwalkers=nwalkers, eps=pos_std)

        else:
            # [o] single burn-in
            # run mcmc
            print("@Regli: running burn-in [{}]...".format(n_burnin))
            pos, prob, state = sampler.run_mcmc(pos0, n_burnin)

            # shrink to a new position
            if shrink == "max":
                p1 = sampler.flatchain[np.argmax(sampler.flatlnprobability)]
            else:
                p1 = np.median(pos, axis=0)
            pos_std = np.std(sampler.flatchain, axis=0) * 0.5

            # reset sampler
            sampler.reset()

            # randomly generate new start position
            pos0 = rand_pos(p1, nwalkers=nwalkers, eps=pos_std)

        # run mcmc
        print("@Regli: running chains [{}]...".format(n_step))
        pos, prob, state = sampler.run_mcmc(pos0, n_step)

        if full:
            # return full result
            return sampler, pos, prob, state
        else:
            # return sampler only
            return sampler
Example #23
0
p0 = np.array([[j for j in np.random.rand(ndim)] for i in xrange(nwalkers)])
p0 = result.x * (1. + 0.05 * (p0 - 0.5))

# sampler = EnsembleSampler(nwalkers, ndim, lnprob4D)

pool = MPIPool()  # loadbalance=True

if not pool.is_master():
    pool.wait()
    sys.exit(0)

sampler = EnsembleSampler(nwalkers, ndim, lnprob4D, pool=pool)

pos, lnprob, rand_state = sampler.run_mcmc(p0, nburn)

sampler.reset(
)  # Reset the chain to remove the burn-in samples; keep walker positions.

pos, lnprob, rand_stateR = sampler.run_mcmc(
    pos, nsteps, rstate0=rand_state)  # rstate0=rstate

if pool.is_master():
    meanacceptance = np.mean(sampler.acceptance_fraction)
    autocorrelationtimes = sampler.get_autocorr_time()

    # Print out the mean acceptance fraction. In general, acceptance_fraction has an entry for each walker so, in this case, it is a
    # 50-dimensional vector.
    print "Mean acceptance fraction:" + str(meanacceptance)

    # Estimate the integrated autocorrelation time for the time series in each parameter.
    print "Autocorrelation time:" + str(autocorrelationtimes)