Beispiel #1
0
    def setup_sampler(self):
        """ Either initialize the sampelr or read in the resume file """
        import ptemcee

        if os.path.isfile(self.resume_file) and self.resume is True:
            logger.info("Resume data {} found".format(self.resume_file))
            with open(self.resume_file, "rb") as file:
                data = dill.load(file)

            # Extract the check-point data
            self.sampler = data["sampler"]
            self.iteration = data["iteration"]
            self.chain_array = data["chain_array"]
            self.log_likelihood_array = data["log_likelihood_array"]
            self.pos0 = data["pos0"]
            self.beta_list = data["beta_list"]
            self.sampler._betas = np.array(self.beta_list[-1])
            self.tau_list = data["tau_list"]
            self.tau_list_n = data["tau_list_n"]
            self.time_per_check = data["time_per_check"]

            # Initialize the pool
            self.sampler.pool = self.pool
            self.sampler.threads = self.threads

            logger.info("Resuming from previous run with time={}".format(
                self.iteration))

        else:
            # Initialize the PTSampler
            if self.threads == 1:
                self.sampler = ptemcee.Sampler(dim=self.ndim,
                                               logl=self.log_likelihood,
                                               logp=self.log_prior,
                                               **self.sampler_init_kwargs)
            else:
                self.sampler = ptemcee.Sampler(dim=self.ndim,
                                               logl=do_nothing_function,
                                               logp=do_nothing_function,
                                               pool=self.pool,
                                               threads=self.threads,
                                               **self.sampler_init_kwargs)

                self.sampler._likeprior = LikePriorEvaluator(
                    self.search_parameter_keys, use_ratio=self.use_ratio)

            # Initialize storing results
            self.iteration = 0
            self.chain_array = self.get_zero_chain_array()
            self.log_likelihood_array = self.get_zero_log_likelihood_array()
            self.beta_list = []
            self.tau_list = []
            self.tau_list_n = []
            self.time_per_check = []
            self.pos0 = self.get_pos0()

        return self.sampler
Beispiel #2
0
 def _initialise_sampler(self):
     import ptemcee
     self._sampler = ptemcee.Sampler(dim=self.ndim,
                                     logl=self.log_likelihood,
                                     logp=self.log_prior,
                                     **self.sampler_init_kwargs)
     self._init_chain_file()
Beispiel #3
0
    def run_sampler(self):
        import ptemcee
        tqdm = get_progress_bar()
        sampler = ptemcee.Sampler(dim=self.ndim, logl=self.log_likelihood,
                                  logp=self.log_prior, **self.sampler_init_kwargs)
        self.pos0 = [[self.get_random_draw_from_prior()
                      for _ in range(self.nwalkers)]
                     for _ in range(self.kwargs['ntemps'])]

        for _ in tqdm(
                sampler.sample(self.pos0, **self.sampler_function_kwargs),
                total=self.nsteps):
            pass

        self.calculate_autocorrelation(sampler.chain.reshape((-1, self.ndim)))
        self.result.sampler_output = np.nan
        self.print_nburn_logging_info()
        self.result.nburn = self.nburn
        if self.result.nburn > self.nsteps:
            logger.warning('Chain not burned in, no samples generated.')
        self.result.samples = sampler.chain[0, :, self.nburn:, :].reshape(
            (-1, self.ndim))
        self.result.betas = sampler.betas
        self.result.log_evidence, self.result.log_evidence_err =\
            sampler.log_evidence_estimate(
                sampler.loglikelihood, self.nburn / self.nsteps)
        self.result.walkers = sampler.chain[0, :, :, :]

        return self.result
Beispiel #4
0
    def run_sampler(self):
        import ptemcee
        tqdm = get_progress_bar()
        sampler = ptemcee.Sampler(dim=self.ndim,
                                  logl=self.log_likelihood,
                                  logp=self.log_prior,
                                  **self.sampler_init_kwargs)
        self.pos0 = [[
            self.get_random_draw_from_prior() for _ in range(self.nwalkers)
        ] for _ in range(self.kwargs['ntemps'])]

        log_likelihood_evaluations = []
        log_prior_evaluations = []
        for pos, logpost, loglike in tqdm(sampler.sample(
                self.pos0, **self.sampler_function_kwargs),
                                          total=self.nsteps):
            log_likelihood_evaluations.append(loglike)
            log_prior_evaluations.append(logpost - loglike)
            pass

        self.calculate_autocorrelation(sampler.chain.reshape((-1, self.ndim)))
        self.result.sampler_output = np.nan
        self.print_nburn_logging_info()
        self.result.nburn = self.nburn
        if self.result.nburn > self.nsteps:
            raise SamplerError(
                "The run has finished, but the chain is not burned in: "
                "`nburn < nsteps`. Try increasing the number of steps.")
        self.result.samples = sampler.chain[0, :, self.nburn:, :].reshape(
            (-1, self.ndim))
        self.result.log_likelihood_evaluations = np.array(
            log_likelihood_evaluations)[self.nburn:, 0, :].reshape((-1))
        self.result.log_prior_evaluations = np.array(log_prior_evaluations)[
            self.nburn:, 0, :].reshape((-1))
        self.result.betas = sampler.betas
        self.result.log_evidence, self.result.log_evidence_err =\
            sampler.log_evidence_estimate(
                sampler.loglikelihood, self.nburn / self.nsteps)
        self.result.walkers = sampler.chain[0, :, :, :]

        return self.result
Beispiel #5
0
def run_pt_emcee(log_like,
                 log_prior,
                 n_burn,
                 n_steps,
                 n_temps=None,
                 n_walkers=None,
                 p_dict=None,
                 p0=None,
                 columns=None,
                 loglargs=(),
                 logpargs=(),
                 threads=None,
                 thin=1,
                 return_lnZ=False,
                 return_sampler=False,
                 return_pos=False):
    """
    Run emcee.

    Parameters
    ----------
    log_like : function
        The function that computes the log likelihood.  Must be of
        the form log_like(p, *llargs), where p is a NumPy array of
        parameters that are sampled by the MCMC sampler.
    log_prior : function
        The function that computes the log prior.  Must be of
        the form log_post(p, *lpargs), where p is a NumPy array of
        parameters that are sampled by the MCMC sampler.
    n_burn : int
        Number of burn steps
    n_steps : int
        Number of MCMC samples to take
    n_temps : int
        The number of temperatures to use in PT sampling.
    n_walkers : int
        Number of walkers
    p_dict : collections.OrderedDict
        Each entry is a tuple with the function used to generate
        starting points for the parameter and the arguments for
        the function.  The starting point function must have the
        call signature f(*args_for_function, n_walkers).  Ignored
        if p0 is not None.
    p0 : array
        n_walkers by n_dim array of initial starting values.
        p0[k,i,j] is the starting point for walk i along variable j
        for temperature k.  If provided, p_dict is ignored.
    columns : list of strings
        Name of parameters.  These will be the column headings in the
        returned DataFrame.  If None, either inferred from p_dict or
        assigned sequential integers.
    args : tuple
        Arguments passed to log_post
    threads : int
        Number of cores to use in calculation
    thin : int
        The number of iterations to perform between saving the
        state to the internal chain.
    return_lnZ : bool, default False
        If True, additionally return lnZ and dlnZ.
    return_sampler : bool, default False
        If True, additionally return sampler.
    return_pos : bool, default False
        If True, additionally return position of the sampler.

    Returns
    -------
    df : pandas.DataFrame
        First columns give flattened MCMC chains, with columns
        named with the variable being sampled as a string.
        Other columns are:
          'chain':    ID of chain
          'beta':     Inverse temperature
          'beta_ind': Index of beta in list of betas
          'lnlike':   Log likelihood
          'lnprob':   Log posterior probability (with beta multiplying
                      log likelihood)
    lnZ : float, optional
        ln Z(1), which is equal to the evidence of the
        parameter estimation problem.
    dlnZ : float, optional
        The estimated error in the lnZ calculation.
    sampler : emcee.PTSampler instance, optional
        The sampler instance.
    pos : ndarray, shape (ntemps, nwalkers, ndim), optional
        Last position of the walkers.
    """

    if p0 is None and p_dict is None:
        raise RuntimeError('Must supply either p0 or p_dict.')

    # Infer n_dim and n_walkers (and check inputs)
    if p0 is None:
        if n_walkers is None:
            raise RuntimeError('n_walkers must be specified if p0 is None')

        if type(p_dict) is not collections.OrderedDict:
            raise RuntimeError('p_dict must be collections.OrderedDict.')

        n_dim = len(p_dict)
    else:
        n_temps, n_walkers, n_dim = p0.shape
        if p_dict is not None:
            warnings.RuntimeWarning('p_dict is being ignored.')

    # Infer columns
    if columns is None:
        if p_dict is not None:
            columns = list(p_dict.keys())
        else:
            columns = list(range(n_dim))
    elif len(columns) != n_dim:
        raise RuntimeError('len(columns) must equal number of parameters.')

    # Check for invalid column names
    invalid_column_names = ['lnprob', 'chain', 'lnlike', 'beta', 'beta_ind']
    if np.any([x in columns for x in invalid_column_names]):
        raise RuntimeError('You cannot name columns with any of these: ' +
                           '  '.join(invalid_column_names))

    # Build starting points of walkers
    if p0 is None:
        p0 = np.empty((n_temps, n_walkers, n_dim))
        for i, key in enumerate(p_dict):
            p0[:, :, i] = p_dict[key][0](*(p_dict[key][1] +
                                           ((n_temps, n_walkers), )))

    # Set up the PTSampler instance
    if threads is not None:
        sampler = ptemcee.Sampler(n_walkers,
                                  n_dim,
                                  log_like,
                                  log_prior,
                                  ntemps=n_temps,
                                  loglargs=loglargs,
                                  logpargs=logpargs,
                                  threads=threads)
    else:
        sampler = ptemcee.Sampler(n_walkers,
                                  n_dim,
                                  log_like,
                                  log_prior,
                                  ntemps=n_temps,
                                  loglargs=loglargs,
                                  logpargs=logpargs)

    # Do burn-in
    if n_burn > 0:
        pos, _, _ = sampler.run_mcmc(p0, iterations=n_burn, storechain=False)
    else:
        pos = p0

    # Sample again, starting from end burn-in state
    pos, _, _ = sampler.run_mcmc(pos, iterations=n_steps, thin=thin)

    # Compute thermodynamic integral
    lnZ, dlnZ = sampler.log_evidence_estimate(fburnin=0)

    # Make DataFrame for results
    df = sampler_to_dataframe(sampler, columns=columns)

    # Set up return
    return_vals = (df, lnZ, dlnZ, sampler, pos)
    return_bool = (True, return_lnZ, return_lnZ, return_sampler, return_pos)
    ret = tuple([rv for rv, rb in zip(return_vals, return_bool) if rb])
    if len(ret) == 1:
        return ret[0]
    return ret
Beispiel #6
0
    def run_sampler(self,
                    total_orbits,
                    burn_steps=0,
                    thin=1,
                    examine_chains=False):
        """
        Runs PT MCMC sampler. Results are stored in ``self.chain`` and ``self.lnlikes``.
        Results also added to ``orbitize.results.Results`` object (``self.results``)

        .. Note:: Can be run multiple times if you want to pause and inspect things.
            Each call will continue from the end state of the last execution.

        Args:
            total_orbits (int): total number of accepted possible
                orbits that are desired. This equals
                ``num_steps_per_walker`` x ``num_walkers``
            burn_steps (int): optional paramter to tell sampler
                to discard certain number of steps at the beginning
            thin (int): factor to thin the steps of each walker
                by to remove correlations in the walker steps
            examine_chains (boolean): Displays plots of walkers at each step by
                running `examine_chains` after `total_orbits` sampled.

        Returns:
            ``emcee.sampler`` object: the sampler used to run the MCMC
        """

        if self.use_pt:
            sampler = ptemcee.Sampler(self.num_walkers,
                                      self.num_params,
                                      self._logl,
                                      orbitize.priors.all_lnpriors,
                                      ntemps=self.num_temps,
                                      threads=self.num_threads,
                                      logpargs=[
                                          self.priors,
                                      ])
        else:
            sampler = emcee.EnsembleSampler(self.num_walkers,
                                            self.num_params,
                                            self._logl,
                                            threads=self.num_threads,
                                            kwargs={'include_logp': True})

        # we're using args because emcee < 3.0 has three return values whereas emcee > 3.0 has
        # four. We can explicitly declare 4 variables instead of args in the future.
        for args in sampler.sample(self.curr_pos,
                                   iterations=burn_steps,
                                   thin=thin):
            pass

        sampler.reset()
        try:
            self.curr_pos = args[0]
        except UnboundLocalError:  # 0 step burn-in (pos is not defined)
            pass
        print('Burn in complete')

        nsteps = int(np.ceil(total_orbits / self.num_walkers))

        assert (nsteps > 0), 'Total_orbits must be greater than num_walkers.'

        i = 0
        # we're using args because emcee < 3.0 has three return values whereas emcee > 3.0 has
        # four. We can explicitly declare 4 variables instead of args in the future.
        for args in sampler.sample(self.curr_pos, iterations=nsteps,
                                   thin=thin):
            i += 1
            # print progress statement
            if i % 5 == 0:
                print(str(i) + '/' + str(nsteps) + ' steps completed',
                      end='\r')
        print('')

        self.curr_pos = args[0]  # note that args[0] is pos output

        # TODO: Need something here to pick out temperatures, just using lowest one for now
        self.chain = sampler.chain

        if self.use_pt:
            self.post = sampler.flatchain[0, :, :]
            # should also be picking out the lowest temperature logps
            self.lnlikes = sampler.loglikelihood[0, :, :].flatten()
            self.lnlikes_alltemps = sampler.loglikelihood
        else:
            self.post = sampler.flatchain
            self.lnlikes = sampler.flatlnprobability

            # convert posterior probability (returned by sampler objects) to likelihood (required by orbitize.results.Results)
            for i, orb in enumerate(self.post):
                self.lnlikes[i] -= orbitize.priors.all_lnpriors(
                    orb, self.priors)

        # include fixed parameters in posterior
        self.post = self._fill_in_fixed_params(self.post)

        self.results.add_samples(self.post,
                                 self.lnlikes,
                                 labels=self.system.labels)

        print('Run complete')

        if examine_chains:
            self.examine_chains()

        return sampler
Beispiel #7
0
    def run_mcmc(self,
                 nsteps,
                 nburnsteps=None,
                 nwalkers=None,
                 status=None,
                 ntemps=1):
        """
        Run MCMC model calibration.  If the chain already exists, continue from
        the last point, otherwise burn-in and start the chain.

        """
        with self.open('a') as f:
            try:
                dset = f['chain']
            except KeyError:
                burn = True
                if nburnsteps is None or nwalkers is None:
                    print(
                        'must specify nburnsteps and nwalkers to start chain')
                    return
                dset = f.create_dataset('chain',
                                        dtype='f8',
                                        shape=(nwalkers, 0, self.ndim),
                                        chunks=(nwalkers, 1, self.ndim),
                                        maxshape=(nwalkers, None, self.ndim),
                                        compression='lzf')
            else:
                burn = False
                nwalkers = dset.shape[0]

            #choose number of temperatures for PTSampler
            if usePTSampler:
                print("Using PTSampler")
                print("ntemps = " + str(ntemps))
                ncpu = cpu_count()
                print("{0} CPUs".format(ncpu))
                Tmax = np.inf
                with Pool() as pool:
                    sampler = ptemcee.Sampler(nwalkers,
                                              self.ndim,
                                              self.log_likelihood,
                                              self.log_prior,
                                              ntemps,
                                              Tmax,
                                              pool=pool)
                    print("Running burn-in phase")
                    nburn0 = nburnsteps
                    pos0 = np.random.uniform(self.min, self.max,
                                             (ntemps, nwalkers, self.ndim))
                    start = time.time()
                    sampler.run_mcmc(pos0, nburn0, adapt=True)
                    end = time.time()
                    print("... finished in " + str(end - start) + " sec")
                    print("sampler.chain.shape " + str(sampler.chain.shape))
                    print("betas = " + str(sampler.betas))
                    #get the last step of the chain
                    pos0 = sampler.chain[:, :, -1, :]
                    print("pos0.shape " + str(pos0.shape))
                    sampler.reset()
                    print("Running MCMC chains")
                    niters = 10
                    for iter in range(niters):
                        print("betas = " + str(sampler.betas))
                        print("iteration " + str(iter) + " ...")
                        start = time.time()
                        sampler.run_mcmc(pos0, nsteps // 10)
                        end = time.time()
                        print("... finished in " + str(end - start) + " sec")

                print("sampler.chain.shape " + str(sampler.chain.shape))
                print('writing chain to file')
                dset.resize(dset.shape[1] + nsteps, 1)
                #save only the zero temperature chain
                dset[:, -nsteps:, :] = sampler.chain[0, :, :, :]

                #save the thermodynamic log evidence
                #logZ, dlogZ = sampler.thermodynamic_integration_log_evidence()
                logZ, dlogZ = sampler.log_evidence_estimate()
                print("logZ = " + str(logZ) + " +/- " + str(dlogZ))
                with open('mcmc/chain-idf-' + str(idf) + '-info.dat',
                          'w') as f:
                    f.write('logZ ' + str(logZ) + '\n')
                    f.write('dlogZ ' + str(dlogZ))

            else:
                sampler = LoggingEnsembleSampler(nwalkers,
                                                 self.ndim,
                                                 self.log_posterior,
                                                 pool=self)
                if burn:
                    print('no existing chain found, starting initial burn-in')
                    # Run first half of burn-in starting from random positions.
                    nburn0 = nburnsteps // 2
                    sampler.run_mcmc(self.random_pos(nwalkers),
                                     nburn0,
                                     status=status)
                    print('resampling walker positions')
                    # Reposition walkers to the most likely points in the chain,
                    # then run the second half of burn-in.  This significantly
                    # accelerates burn-in and helps prevent stuck walkers.
                    X0 = sampler.flatchain[np.unique(
                        sampler.flatlnprobability,
                        return_index=True)[1][-nwalkers:]]
                    sampler.reset()
                    X0 = sampler.run_mcmc(X0,
                                          nburnsteps - nburn0,
                                          status=status,
                                          storechain=False)[0]
                    sampler.reset()
                    print('burn-in complete, starting production')
                else:
                    print('restarting from last point of existing chain')
                    X0 = dset[:, -1, :]
                sampler.run_mcmc(X0, nsteps, status=status)
                print('writing chain to file')
                dset.resize(dset.shape[1] + nsteps, 1)
                dset[:, -nsteps:, :] = sampler.chain
Beispiel #8
0
    def __init__(self,
                 model,
                 nwalkers,
                 ntemps=None,
                 Tmax=None,
                 betas=None,
                 adaptive=False,
                 adaptation_lag=None,
                 adaptation_time=None,
                 scale_factor=None,
                 loglikelihood_function=None,
                 checkpoint_interval=None,
                 checkpoint_signal=None,
                 nprocesses=1,
                 use_mpi=False):

        self.model = model
        ndim = len(model.variable_params)
        # create temperature ladder if needed
        if ntemps is None and Tmax is None and betas is None:
            raise ValueError("must provide either ntemps/Tmax or betas")
        if betas is None:
            betas = ptemcee.make_ladder(ndim, ntemps=ntemps, Tmax=Tmax)
        # construct the keyword arguments to pass; if a kwarg is None, we
        # won't pass it, resulting in ptemcee's defaults being used
        kwargs = {}
        kwargs['adaptive'] = adaptive
        kwargs['betas'] = betas
        if adaptation_lag is not None:
            kwargs['adaptation_lag'] = adaptation_lag
        if adaptation_time is not None:
            kwargs['adaptation_time'] = adaptation_time
        if scale_factor is not None:
            kwargs['scale_factor'] = scale_factor
        # create a wrapper for calling the model
        if loglikelihood_function is None:
            loglikelihood_function = 'loglikelihood'
        # frustratingly, ptemcee does not support blob data, so we have to
        # turn it off
        model_call = models.CallModel(model,
                                      loglikelihood_function,
                                      return_all_stats=False)
        # these are used to help paralleize over multiple cores / MPI
        models._global_instance = model_call
        model_call = models._call_global_model
        prior_call = models._call_global_model_logprior
        self.pool = choose_pool(mpi=use_mpi, processes=nprocesses)
        # construct the sampler
        self._sampler = ptemcee.Sampler(nwalkers=nwalkers,
                                        ndim=ndim,
                                        logl=model_call,
                                        logp=prior_call,
                                        mapper=self.pool.map,
                                        **kwargs)
        self.nwalkers = nwalkers
        self._ntemps = ntemps
        self._checkpoint_interval = checkpoint_interval
        self._checkpoint_signal = checkpoint_signal
        # we'll initialize ensemble and chain to None
        self._chain = None
        self._ensemble = None
Beispiel #9
0
    def run(self):
        """
        Run the parallel-tempering algorithm
        """

        # PREPARE FOR RUNNING

        # Define initial walkers population
        if self.p0 is not None:
            pass

        elif type(None) not in ( type(self.opt_data), type(self.ntemps), type(self.nwalkers), type(self.fbest)):
            # distributions using opt_data
            self.p0 = init_walkers(self.PSystem,distribution=self.distribution,
                                    opt_data=self.opt_data, ntemps=self.ntemps,
                                    nwalkers=self.nwalkers,fbest=self.fbest)
        elif type(None) not in ( type(self.ntemps), type(self.nwalkers) ):
            # Uniform distribution maybe?
            self.p0 = init_walkers(self.PSystem,distribution=self.distribution,
                                    ntemps=self.ntemps, nwalkers=self.nwalkers)
        else:
            raise NameError("Not enough information to initialize MCMC.\n\n" + 
                            "--> Provide an array using physical values through the 'p0' kwarg with shape (temperatures, walkers, dimensions)\n" +
                            "or\n" +
                            "--> Define: 'opt_data', 'fbest', 'ntemps', " +
                            "'nwalkers', and 'distribution' to initialize " +
                            "walkers from optimizers.")
        
        # Update ndim and nwalkers from p0 above
        self.ntemps, self.nwalkers, ndim_tmp = self.p0.shape

        # p0 is normalized? Or is it physical?
        if (self.p0 >= 0.).all() and (self.p0 <= 1.).all():
            # cube
            p0_norm = True  
            insert_cnst = False  
        else:
            # physical
            p0_norm = False 
            # If p0 is physical, then constants must be inserted  
            insert_cnst = True 

        # Check for consistency in input parameters
        if self.nwalkers < 2 * self.PSystem.ndim:
            raise RuntimeError(f"Number of walkers must be >= 2*ndim, i.e., " +
                f"nwalkers have to be >= {2 * self.PSystem.ndim}.")

        if ndim_tmp != self.PSystem.ndim:
            raise RuntimeError(f"Number of dimensions in 'PSystem' " +
            f"({self.PSystem.ndim}) differs from that in 'p0' ({ndim_tmp}).")
        
        # temperatures from betas
        if self.betas is not None:
            if len(self.betas) == self.ntemps:
                pass
            else:
                raise RuntimeError(f"Number of 'betas' ({self.betas}) differs"+
                f" from number of temperatures in 'p0' ({self.ntemps})")

        # Verify path exists
        if os.path.exists(self.path):
            pass
        else:
            raise RuntimeError(f"directory -path- {self.path} does not exist")

        # hdf5 file name to save mcmc data
        if self.file_name is not None:
            self.hdf5_filename =  f"{self.path}{self.file_name}{self.suffix}.hdf5"   
        else:
            self.hdf5_filename = f"{self.path}{self.PSystem.system_name}{self.suffix}.hdf5"


        # Time it
        ti = time.time()
        now = datetime.datetime.now()
        print("\n =========== PARALLEL-TEMPERING MCMC ===========\n")
        print("--> Starting date: ", now.strftime("%Y-%m-%d %H:%M"))
        print("--> Reference epoch of the solutions: ", self.PSystem.t0, " [JD]")
        print('--> Results will be saved at: ', self.hdf5_filename)
        print("--> MCMC parameters:")
        print(f"      -ntemps: {self.ntemps}")
        print(f"      -nwalkers: {self.nwalkers}")
        print(f"      -itmax: {self.itmax}")
        print(f"      -intra_steps: {self.intra_steps}")
        print()

        # Create an h5py file
        self._set_hdf5( self.PSystem, self.hdf5_filename)
        
        # Default values in ptemcee, 
        # Do not change it at least you have read Vousden et al. (2016):
        _nu = 100. #/self.nwalkers 
        _t0 = 1000. #/self.nwalkers
        a_scale = 10

        # RUN
        with closing(Pool(processes=self.cores)) as pool:

            sampler = pt.Sampler(
                                nwalkers=self.nwalkers,
                                dim=self.PSystem.ndim,
                                logp=self.logprior, 
                                logl=log_likelihood_func,
                                ntemps=self.ntemps,
                                betas=self.betas,
                                adaptation_lag = _t0,
                                adaptation_time=_nu,
                                a=a_scale, 
                                Tmax=self.tmax,
                                pool=pool,
                                loglargs=(self.PSystem, 
                                            p0_norm, # cube
                                            insert_cnst), # insert_constants
                                logpkwargs={'psystem':self.PSystem}
                                )

            index = 0
            autocorr = np.empty( self.nsteps )
            
            # thin: The number of iterations to perform between saving the 
            # state to the internal chain.
            for iteration, s in enumerate(
                                sampler.sample(p0=self.p0, iterations=self.itmax, 
                                thin=self.intra_steps, storechain=True, 
                                adapt=True, swap_ratios=False)):

                # s[0] = walkers position
                # s[1] = log-posterior
                # s[2] = log-likelihood

                if (iteration+1) % self.intra_steps :
                    continue
                
                # Identify current maximum a posteriori (MAP)
                max_value, max_index = max((x, (i, j))
                                for i, row in enumerate(s[1][:]) #MAP is calculated over the posterior
                                for j, x in enumerate(row))

                # get_autocorr_time, returns a matrix of autocorrelation 
                # lengths for each parameter in each temperature of shape 
                # ``(Ntemps, ndim)``.
                tau = sampler.get_autocorr_time()[0] # Take only colder temp
                mean_tau = np.mean(tau)
                # tswap_acceptance_fraction, returns an array of accepted 
                # temperature swap fractions for each temperature; 
                # shape ``(ntemps, )
                # nswap_accepted/nswap
                swap = list(sampler.tswap_acceptance_fraction)

                # acceptance_fraction, matrix of shape ``(Ntemps, Nwalkers)`` 
                # detailing the acceptance fraction for each walker.
                # nprop_accepted/nprop
                acc0 = sampler.acceptance_fraction[0,:]

                xbest = s[0][max_index[0]][max_index[1]]

                current_meanposterior = np.mean(s[1][0][:])
                current_meanlogl = np.mean(s[2][0][:])
                std_meanlogp = np.std(s[1][0][:]) 

                # Output in terminal
                if self.verbose:
                    print("--------- Iteration: ", iteration + 1)
                    print(" Mean tau Temp 0:", round(mean_tau, 3))
                    print(" Accepted swap fraction in Temp 0: ", round(swap[0],3))
                    print(" Mean acceptance fraction Temp 0: ", round(np.mean(acc0),3))
                    print(" Mean log-likelihood: ", round(current_meanlogl, 3))
                    print(" Mean log-posterior:  ", round(current_meanposterior, 3))
                    print(" Current log-posterior dispersion: ", round(std_meanlogp, 3))
                    print(" Current MAP: ", max_index,  round(max_value,3))
                
                autocorr[index] = mean_tau
                
                # Save data in hdf5 File
                # shape for chains is: (temps,walkers,steps,dim)
                # It's worth saving temperatures others than 0???
                ta = time.time()  # Monitor the time wasted in saving..
                self._save_mcmc(self.hdf5_filename, 
                                sampler.chain[:,:,index,:], 
                                xbest, 
                                sampler.betas, 
                                autocorr, 
                                index, 
                                max_value, 
                                swap, 
                                max_index, 
                                iteration, 
                                current_meanposterior)
                                #current_meanlogl)
                if self.verbose:
                    print(f' Saving time: {(time.time() - ta) :.5f} sec')

                """
                                CONVERGENCE CRITERIA
                Write here your favorite convergence criteria 
                """
                ##geweke()

                if self.verbose:
                    print(' Elapsed time: ', round((time.time() - ti)/60.,4),'min')  

                index += 1              
                if (index+1)*self.intra_steps > self.itmax:
                    print('\n--> Maximum number of iterations reached in MCMC')
                    break				

        
        # Extract best solutions from hdf5 file and write it in ascci
        extract_best_solutions(self.PSystem, self.hdf5_filename, write_file=True)

        print("--> Reference epoch of the solutions: ", self.PSystem.t0, " [JD]")
        print('--> Iterations performed: ', iteration +1)
        print('--> Elapsed time in MCMC:', round((time.time() - ti)/60.,4), 
                    'minutes')

        return sampler
Beispiel #10
0
 def run_emcee(p,nwalkers,nsteps,ndim,multiT,convTest,pos,lnprob):
     
     """
      Run MCMC with:
          Number of walkers = nwalkers
          Number of dimensions = ndim
          Number of steps = nsteps
          Log probability function = lnprob
          Pool = p
          Initial walker positions = pos
          
     If multiT is true, MCMC will be run at 3 different temperatures (inverses 
     given by betas). If convTest is true, MCMC will either run until 
     convergence or for nsteps steps, whichever happens first.
     
     """
     
     if convTest: # walker paths will be stored in backend and periodically checked for convergence
         filename = headFile+".h5"
         backend = emcee.backends.HDFBackend(filename)
         backend.reset(nwalkers, ndim)
     
         sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, backend=backend, pool=p)
     
         max_n = nsteps
     
         #sampler.run_mcmc(pos, 500)
         # We'll track how the average autocorrelation time estimate changes
         index = 0
         autocorr = np.empty(max_n)
     
         old_tau = np.inf
     
         # Now we'll sample for up to max_n steps
         for sample in sampler.sample(pos, store=True, iterations=max_n, progress=True):
             # Only check convergence every 100 steps
             if sampler.iteration % 100:
                 continue
     
             # Compute the autocorrelation time so far
             # Using tol=0 means that we'll always get an estimate even if it isn't trustworthy
             tau = sampler.get_autocorr_time(tol=0)
             autocorr[index] = np.mean(tau)
             index += 1
     
             # Check convergence
             converged = np.all(tau * 100 < sampler.iteration)
             converged &= np.all(np.abs(old_tau - tau) / tau < 0.01)
             if converged:
                 break
             old_tau = tau
     
         nsteps = sampler.iteration
         # find mle_soln, the walker position with the maximum probability
         chain = sampler.chain
         probs = sampler.get_log_prob()
         maxprob=np.argmax(probs)
         hp_loc = np.unravel_index(maxprob, probs.shape)
         mle_soln = chain[(hp_loc[1],hp_loc[0])] # switching from order (nsteps,nwalkers) to (nwalkers,nsteps)
         print(mle_soln)
         return nsteps, chain, mle_soln, probs, sampler
 
 
     elif multiT:
         betas = np.asarray([0.01, 0.505, 1.0]) # inverse temperatures for log-likelihood
         sampler = ptemcee.Sampler(nwalkers, ndim, lnprob, lnprior, betas=betas, pool=p)
         sampler.run_mcmc(pos, nsteps)
         # find mle_soln, the walker position with the maximum probability
         chain = sampler.chain[2][:,:,:]
         probs = sampler.logprobability[2]
         maxprob = np.argmax(probs)
         hp_loc = np.unravel_index(maxprob, probs.shape)
         mle_soln = chain[hp_loc] # already in order (nwalkers,nsteps)
         print(mle_soln)
         return nsteps, chain, mle_soln, probs, sampler
         
     else:
         sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, pool=p)
         sampler.run_mcmc(pos, nsteps, store=True)
         # find mle_soln, the walker position with the maximum probability
         chain = sampler.chain
         probs = sampler.get_log_prob()
         maxprob = np.argmax(probs)
         hp_loc = np.unravel_index(maxprob, probs.shape)
         mle_soln = chain[(hp_loc[1],hp_loc[0])] # switching from order (nsteps,nwalkers) to (nwalkers,nsteps)
         print(mle_soln)
         return nsteps, chain, mle_soln, probs, sampler
Beispiel #11
0
    def run_sampler(self,
                    total_orbits,
                    burn_steps=0,
                    thin=1,
                    examine_chains=False,
                    output_filename=None,
                    periodic_save_freq=None):
        """
        Runs PT MCMC sampler. Results are stored in ``self.chain`` and ``self.lnlikes``.
        Results also added to ``orbitize.results.Results`` object (``self.results``)

        .. Note:: Can be run multiple times if you want to pause and inspect things.
            Each call will continue from the end state of the last execution.

        Args:
            total_orbits (int): total number of accepted possible
                orbits that are desired. This equals
                ``num_steps_per_walker`` x ``num_walkers``
            burn_steps (int): optional paramter to tell sampler
                to discard certain number of steps at the beginning
            thin (int): factor to thin the steps of each walker
                by to remove correlations in the walker steps
            examine_chains (boolean): Displays plots of walkers at each step by
                running `examine_chains` after `total_orbits` sampled.
            output_filename (str): Optional filepath for where results file can be saved.
            periodic_save_freq (int): Optionally, save the current results into ``output_filename``
                every nth step while running, where n is value passed into this variable. 

        Returns:
            ``emcee.sampler`` object: the sampler used to run the MCMC
        """

        if periodic_save_freq is not None and output_filename is None:
            raise ValueError(
                "output_filename must be defined for periodic saving of the chains"
            )
        if periodic_save_freq is not None and not isinstance(
                periodic_save_freq, int):
            raise TypeError("periodic_save_freq must be an integer")

        nsteps = int(np.ceil(total_orbits / self.num_walkers))
        if nsteps <= 0:
            raise ValueError("Total_orbits must be greater than num_walkers.")

        if self.use_pt:
            sampler = ptemcee.Sampler(self.num_walkers,
                                      self.num_params,
                                      self._logl,
                                      orbitize.priors.all_lnpriors,
                                      ntemps=self.num_temps,
                                      threads=self.num_threads,
                                      logpargs=[
                                          self.priors,
                                      ])
        else:
            if self.num_threads != 1:
                print(
                    'Setting num_threads=1. If you want parallel processing for emcee implemented in orbitize, let us know.'
                )
                self.num_threads = 1

            sampler = emcee.EnsembleSampler(self.num_walkers,
                                            self.num_params,
                                            self._logl,
                                            kwargs={'include_logp': True})

        print("Starting Burn in")
        for i, state in enumerate(
                sampler.sample(self.curr_pos, iterations=burn_steps,
                               thin=thin)):
            if self.use_pt:
                self.curr_pos = state[0]
            else:
                self.curr_pos = state.coords

            if (i + 1) % 5 == 0:
                print(str(i + 1) + '/' + str(burn_steps) +
                      ' steps of burn-in complete',
                      end='\r')

            if periodic_save_freq is not None:
                if (i + 1
                    ) % periodic_save_freq == 0:  # we've completed i+1 steps
                    self.results.curr_pos = self.curr_pos
                    self.results.save_results(output_filename)

        sampler.reset()
        print('')
        print('Burn in complete. Sampling posterior now.')

        saved_upto = 0  # keep track of how many steps of this chain we've saved. this is the next index that needs to be saved
        for i, state in enumerate(
                sampler.sample(self.curr_pos, iterations=nsteps, thin=thin)):
            if self.use_pt:
                self.curr_pos = state[0]
            else:
                self.curr_pos = state.coords

            # print progress statement
            if (i + 1) % 5 == 0:
                print(str(i + 1) + '/' + str(nsteps) + ' steps completed',
                      end='\r')

            if periodic_save_freq is not None:
                if (i + 1
                    ) % periodic_save_freq == 0:  # we've completed i+1 steps
                    self._update_chains_from_sampler(sampler, num_steps=i + 1)

                    # figure out what is the new chunk of the chain and corresponding lnlikes that have been computed before last save
                    # grab the current posterior and lnlikes and reshape them to have the Nwalkers x Nsteps dimension again
                    post_shape = self.post.shape
                    curr_chain_shape = (self.num_walkers,
                                        post_shape[0] // self.num_walkers,
                                        post_shape[-1])
                    curr_chain = self.post.reshape(curr_chain_shape)
                    curr_lnlike_chain = self.lnlikes.reshape(
                        curr_chain_shape[:2])
                    # use the reshaped arrays and find the new steps we computed
                    curr_chunk = curr_chain[:, saved_upto:i + 1]
                    curr_chunk = curr_chunk.reshape(
                        -1,
                        curr_chunk.shape[-1])  # flatten nwalkers x nsteps dim
                    curr_lnlike_chunk = curr_lnlike_chain[:, saved_upto:i +
                                                          1].flatten()

                    # add this current chunk to the results object (which already has all the previous chunks saved)
                    self.results.add_samples(curr_chunk,
                                             curr_lnlike_chunk,
                                             labels=self.system.labels,
                                             curr_pos=self.curr_pos)
                    self.results.save_results(output_filename)
                    saved_upto = i + 1

        print('')
        self._update_chains_from_sampler(sampler)

        if periodic_save_freq is None:
            # need to save everything
            self.results.add_samples(self.post,
                                     self.lnlikes,
                                     labels=self.system.labels,
                                     curr_pos=self.curr_pos)
        elif saved_upto < nsteps:
            # just need to save the last few
            # same code as above except we just need to grab the last few
            post_shape = self.post.shape
            curr_chain_shape = (self.num_walkers,
                                post_shape[0] // self.num_walkers,
                                post_shape[-1])
            curr_chain = self.post.reshape(curr_chain_shape)
            curr_lnlike_chain = self.lnlikes.reshape(curr_chain_shape[:2])
            curr_chunk = curr_chain[:, saved_upto:]
            curr_chunk = curr_chunk.reshape(
                -1, curr_chunk.shape[-1])  # flatten nwalkers x nsteps dim
            curr_lnlike_chunk = curr_lnlike_chain[:, saved_upto:].flatten()

            self.results.add_samples(curr_chunk,
                                     curr_lnlike_chunk,
                                     labels=self.system.labels,
                                     curr_pos=self.curr_pos)

        if output_filename is not None:
            self.results.save_results(output_filename)

        print('Run complete')

        if examine_chains:
            self.examine_chains()

        return sampler
Beispiel #12
0
    def throw_darts(self, nburn=1000, nsteps=1000, method='emcee'):
        """
        Run the sampler.

        Args:
            nburn : int (default: 1000), number of burn-in steps.
            nsteps : int (default: 1000), number of steps to be saved.
        """

        # To allow for PT sampling
        if self.ntemps is not None:

            try:
                import ptemcee
            except ImportError:
                raise ImportError(
                    "You must pip install ptemcee to run the parallel-tempering MCMC method"
                )

            method = 'emcee_PT'

        if method == 'emcee':

            # Define sampler
            if self.pool is not None:
                sampler = emcee.EnsembleSampler(
                    self.nwalkers,
                    self.dim,
                    self.posterior_function,
                    args=[self],
                    blobs_dtype=posterior.blobs_dtype,
                    moves=self.emcee_moves,
                    pool=self.pool)
                self.pool = None
            elif self.threads != 1:
                sampler = emcee.EnsembleSampler(
                    self.nwalkers,
                    self.dim,
                    self.posterior_function,
                    args=[self],
                    blobs_dtype=posterior.blobs_dtype,
                    moves=self.emcee_moves,
                    threads=self.threads)
            else:
                sampler = emcee.EnsembleSampler(
                    self.nwalkers,
                    self.dim,
                    self.posterior_function,
                    blobs_dtype=posterior.blobs_dtype,
                    args=[self],
                    moves=self.emcee_moves)

            # Burn-in
            print(self.p0.shape)
            print("Starting burn-in...")
            pos = sampler.run_mcmc(self.p0, nburn)
            # pos,prob,state,binary_data = sampler.run_mcmc(self.p0, nburn)
            print("...finished running burn-in")

            # Full run
            print("Starting full run...")
            sampler.reset()
            sampler.run_mcmc(pos, nsteps)
            # pos,prob,state,binary_data = sampler.run_mcmc(pos, nsteps)
            print("...full run finished")

            # Save only every 100th sample
            self.chains = sampler.chain[:, ::self.thin, :]
            self.derived = np.swapaxes(np.array(sampler.blobs), 0,
                                       1)[:, ::self.thin]
            # self.derived = np.swapaxes(np.array(sampler.blobs), 0, 1)[:,::self.thin,0,:]
            self.lnprobability = sampler.lnprobability[:, ::self.thin]

            self.sampler = sampler

        elif method == 'emcee_PT':

            # Define sampler
            if self.pool is not None:
                sampler = ptemcee.Sampler(self.nwalkers,
                                          self.dim,
                                          self.ln_likelihood_function,
                                          self.ln_prior_function,
                                          ntemps=self.ntemps,
                                          Tmax=self.Tmax,
                                          blobs_dtype=posterior.blobs_dtype,
                                          loglargs=(self, ),
                                          logpargs=(self, ),
                                          pool=self.pool)
                self.pool = None
            else:
                sampler = ptemcee.Sampler(self.nwalkers,
                                          self.dim,
                                          self.ln_likelihood_function,
                                          self.ln_prior_function,
                                          ntemps=self.ntemps,
                                          Tmax=self.Tmax,
                                          blobs_dtype=posterior.blobs_dtype,
                                          loglargs=(self, ),
                                          logpargs=(self, ))

            # Burn-in
            print("Starting burn-in...")
            for pos, prob, state in sampler.sample(self.p0, iterations=nburn):
                pass
            print("...finished running burn-in")

            # Full run
            print("Starting full run...")
            sampler.reset()
            for pos, prob, state in sampler.sample(pos,
                                                   iterations=nsteps,
                                                   thin=self.thin):
                pass
            print("...full run finished")

            self.chains = sampler.chain
            self.derived = sampler.blobs
            self.lnprobability = sampler.logprobability
            self.sampler = sampler

        elif method == 'nestle':

            print("Nested sampling is not yet implemented.")

        else:
            print("Your chosen method is not supported by dart_board.")
Beispiel #13
0
    def sample(self):
        '''
        Run the MCMC.
        '''
        # First make sure that the maximum likelihood params are fitted
        if not self.minimized:
            self.approximate_ml()
        # print(self.params_all)

        ndim, nwalkers = len(self.params_vary), self.config['NWALKERS']
        p0 = np.zeros((nwalkers, len(self.params_vary)))
        pml = [self.params_all[pname] for pname in self.params_vary]

        for pnum, pname in enumerate(self.params_vary):
            p0[:, pnum] = (np.random.randn(nwalkers)\
            * self.config['SAMPLE_BALL']+1.)*pml[pnum]

        plist = []

        for key in self.params_vary.keys():
            plist.append(key)

        args = (self.freqs, self.tb_meas, self.var_tb, self.params_all, plist,
                self.params_vary, self.fg_model, self.sig_model)

        if self.config['MPI']:
            from emcee.utils import MPIPool
            pool = MPIPool()

            if not pool.is_master():
                pool.wait()
                sys.exit(0)
                self.sampler = emcee.EnsembleSampler(nwalkers,
                                                     ndim,
                                                     lnprob,
                                                     args=args,
                                                     pool=pool)

            self.sampler.run_mcmc(p0, self.config['NBURN'])  # burn in

            p0 = self.sampler.chain[:, -1, :].squeeze()

            self.sampler.reset()
            self.sampler.run_mcmc(p0, self.config['NSTEPS'])
            pool.close()
        else:
            if self.config['SAMPLER'] == 'PARALLELTEMPERING':
                logl = lambda x: lnlike(
                    x, self.freqs, self.tb_meas, self.var_tb, self.params_all,
                    self.params_vary, self.fg_model, self.sig_model)

                logp = lambda x: lnprior(x, self.params_vary.keys(), self.
                                         params_vary)

                self.sampler = ptemcee.Sampler(
                    ntemps=self.config['NTEMPS'],
                    nwalkers=self.config['NWALKERS'],
                    dim=self.ndim,
                    logl=logl,
                    logp=logp)
            else:
                self.sampler = emcee.EnsembleSampler(
                    nwalkers=self.config['NWALKERS'],
                    ndim=ndim,
                    log_prob_fn=lnprob,
                    args=args,
                    threads=self.config['THREADS'])

            # If we use PT sampling, we need a further dimension of
            # start parameters for the different temperatures
            if self.config['SAMPLER'] == 'PARALLELTEMPERING':
                p0 = np.array([p0 for m in range(self.config['NTEMPS'])])

            # Run the MCMC for the burn-in
            self.sampler.run_mcmc(p0,
                                  self.config['NBURN'],
                                  thin=self.config['NTHIN'])

            # Reset after burn-in and run the full chain
            if self.config['SAMPLER'] == 'PARALLELTEMPERING':
                p0 = self.sampler.chain[:, :, -1, :]
            else:
                p0 = self.sampler.chain[:, -1, :].squeeze()
            self.sampler.reset()
            self.sampler.run_mcmc(p0,
                                  self.config['NSTEPS'],
                                  thin=self.config['NTHIN'])

        # Create output directory
        if not os.path.exists(self.config['PROJECT_NAME']):
            os.makedirs(self.config['PROJECT_NAME'])

        # Save output and configuration
        with open(os.path.join(self.config['PROJECT_NAME'], 'config.yaml'),
                  'w') as f:
            yaml.dump(self.config, f, default_flow_style=False)

        with open(os.path.join(self.config['PROJECT_NAME'], 'ml_params.yaml'),
                  'w') as f:
            yaml.dump(self.params_all, f, default_flow_style=False)

        self.sampled = True

        # Collect result parameters
        ###########################
        resultdict = {}

        # Chain
        #######
        resultdict['chain'] = self.sampler.chain,

        # Conservative evidence
        #######################
        if (self.config['COMPUTECOVARIANCE'] &
            (self.config['SAMPLER'] == 'ENSEMBLESAMPLER')):

            # Estimate autocorrelation
            self.acors = self.sampler.acor.astype(int)
            resultdict['autocorrs'] = self.acors

            # Estimate covariance
            self.cov_samples = np.zeros(
                (len(self.params_vary), len(self.params_vary)))
            resultdict['cov_samples'] = self.cov_samples

            for i in range(len(self.params_vary)):
                for j in range(len(self.params_vary)):
                    stepsize = np.max([self.acors[i], self.acors[j]])
                    csample_i = self.sampler.chain[i, ::stepsize, :].flatten()
                    csample_j = self.sampler.chain[j, ::stepsize, :].flatten()
                    self.cov_samples[i, j] = np.mean(
                        (csample_i - csample_i.mean()) *
                        (csample_j - csample_j.mean()))

            # Compute conservative evidence without prior factor
            self.conservative_evidence = np.exp(self.ln_ml) / np.sqrt(
                np.linalg.det(self.cov_samples))
            resultdict['conservative_evidence'] = self.conservative_evidence

        # Evidence from thermodynamic integration from the PT sampler
        #############################################################
        if self.config['SAMPLER'].lower() == 'paralleltempering':
            self.logz, self.dlogz = self.sampler.log_evidence_estimate(
                fburnin=0.)

            resultdict['log_thd_evidence'] = self.logz
            resultdict['dlog_thd_evidence'] = self.dlogz

        # Posterior mean
        # The posterior mean values of the parameters
        ###############
        post_mean_vals = np.mean(self.sampler.flatchain, axis=0)
        resultdict['post_mean_vals'] = post_mean_vals

        # Likelihood
        # The value of the posterior for the best-fit results
        ############
        logL = self.sampler.log_prob_fn(post_mean_vals)
        resultdict['logL'] = logL

        # Save as .npz
        np.savez(os.path.join(self.config['PROJECT_NAME'], 'output.npz'),
                 **resultdict)
Beispiel #14
0
    def run_sampler(self, total_orbits, burn_steps=0, thin=1):
        """
        Runs PT MCMC sampler. Results are stored in ``self.chain`` and ``self.lnlikes``.
        Results also added to ``orbitize.results.Results`` object (``self.results``)

        .. Note:: Can be run multiple times if you want to pause and inspect things.
            Each call will continue from the end state of the last execution.

        Args:
            total_orbits (int): total number of accepted possible
                orbits that are desired. This equals
                ``num_steps_per_walker`` x ``num_walkers``
            burn_steps (int): optional paramter to tell sampler
                to discard certain number of steps at the beginning
            thin (int): factor to thin the steps of each walker
                by to remove correlations in the walker steps

        Returns:
            ``emcee.sampler`` object: the sampler used to run the MCMC
        """

        if self.use_pt:
            sampler = ptemcee.Sampler(self.num_walkers,
                                      self.num_params,
                                      self._logl,
                                      orbitize.priors.all_lnpriors,
                                      ntemps=self.num_temps,
                                      threads=self.num_threads,
                                      logpargs=[
                                          self.priors,
                                      ])
        else:
            sampler = emcee.EnsembleSampler(self.num_walkers,
                                            self.num_params,
                                            self._logl,
                                            threads=self.num_threads,
                                            kwargs={'include_logp': True})

        for pos, lnprob, lnlike in sampler.sample(self.curr_pos,
                                                  iterations=burn_steps,
                                                  thin=thin):
            pass

        sampler.reset()
        try:
            self.curr_pos = pos
        except UnboundLocalError:  # 0 step burn-in (pos is not defined)
            pass
        print('Burn in complete')

        nsteps = int(np.ceil(total_orbits / self.num_walkers))

        assert (nsteps > 0), 'Total_orbits must be greater than num_walkers.'

        i = 0
        for pos, lnprob, lnlike in sampler.sample(p0=self.curr_pos,
                                                  iterations=nsteps,
                                                  thin=thin):
            i += 1
            # print progress statement
            if i % 5 == 0:
                print(str(i) + '/' + str(nsteps) + ' steps completed',
                      end='\r')
        print('')

        self.curr_pos = pos

        # TODO: Need something here to pick out temperatures, just using lowest one for now
        self.chain = sampler.chain

        if self.use_pt:
            self.post = sampler.flatchain[0, :, :]
            self.lnlikes = sampler.logprobability[0, :, :].flatten(
            )  # should also be picking out the lowest temperature logps
            self.lnlikes_alltemps = sampler.logprobability
        else:
            self.post = sampler.flatchain
            self.lnlikes = sampler.lnprobability

        # include fixed parameters in posterior
        self.post = self._fill_in_fixed_params(self.post)

        self.results.add_samples(self.post, self.lnlikes)

        print('Run complete')

        return sampler
Beispiel #15
0
        print("1 sigma spread", sigma1_1)
        print("2 sigma spread", sigma2_1)

        quantiles = np.percentile(sampler.flatchain[:, 1],
                                  [2.28, 15.9, 50, 84.2, 97.7])
        sigma1_2 = 0.5 * (quantiles[3] - quantiles[1])
        sigma2_2 = 0.5 * (quantiles[4] - quantiles[0])
        print("1 sigma spread", sigma1_2)
        print("2 sigma spread", sigma2_2)

    elif multiT:
        betas = np.asarray([0.01, 0.505,
                            1.0])  #inverse temperatures for log-likelihood
        sampler = ptemcee.Sampler(nwalkers,
                                  ndim,
                                  lnprob,
                                  lnprior,
                                  betas=betas,
                                  threads=3)
        sampler.run_mcmc(pos, nsteps)
        chain = sampler.chain[2][:, :, :]
    else:
        sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob)
        sampler.run_mcmc(pos, nsteps)
        chain = sampler.chain
        probs = sampler.get_log_prob()

        maxprob = np.argmin(probs)
        hp_loc = np.unravel_index(maxprob, probs.shape)
        mle_soln = chain[(
            hp_loc[1], hp_loc[0]
        )]  #switching from order (nsteps,nwalkers) to (nwalkers,nsteps)
Beispiel #16
0
def samplePtemcee(t,
                  y,
                  ye,
                  mup,
                  sigp,
                  Tmax,
                  nwalkers=100,
                  nsteps=1000,
                  nburn=None,
                  ntemps=21,
                  sampleFile=None,
                  maxTemp=np.inf):

    ndim = len(mup)
    ndata = len(t)

    if nburn is None:
        nburn = nsteps // 4

    doTheSampling = True
    betas = None

    if sampleFile is not None:
        with h5.File(sampleFile, "a") as f:
            if ('ptemcee/chain' in f and 'ptemcee/lnprobability' in f
                    and 'ptemcee/lnlikelihood' in f and 'ptemcee/betas' in f):
                chain = f['ptemcee/chain'][...]
                lnprobability = f['ptemcee/lnprobability'][...]
                lnlikelihood = f['ptemcee/lnlikelihood'][...]
                betas = f['ptemcee/betas'][...]
                try:
                    assert chain.shape == (ntemps, nwalkers, nsteps, ndim)
                    assert lnprobability.shape == (ntemps, nwalkers, nsteps)
                    assert lnlikelihood.shape == (ntemps, nwalkers, nsteps)
                    assert betas.shape == (ntemps, )
                    chain = chain
                    samps = chain[0].reshape((-1, ndim))
                    lnprobs = lnprobability[0].reshape((-1, ))
                    lnlikes = lnlikelihood
                    doTheSampling = False
                except AssertionError:
                    pass

    if doTheSampling:

        if betas is None:
            betas = ptemcee.make_ladder(ndim, ntemps, maxTemp)

        sampler = ptemcee.Sampler(nwalkers,
                                  ndim,
                                  loglike,
                                  logprior,
                                  logl_args=(t, y, ye),
                                  logp_args=(mup, sigp),
                                  betas=betas,
                                  adaptive=True)

        p0 = mup[None, None, :] + np.random.normal(0.0, 1.0e-4,
                                                   (ntemps, nwalkers, ndim))

        if nburn > 0:
            for i, result in enumerate(
                    sampler.sample(p0, iterations=nburn, storechain=False)):
                print("Burn in {0:d} steps: {1:.1f}%".format(
                    nburn, 100 * (i + 1) / nburn),
                      end='\r')
            print('')
            sampler.reset()
        else:
            result = (p0, )

        for i, result in enumerate(
                sampler.sample(*result, iterations=nsteps, storechain=True)):
            print("Sampling {0:d} steps: {1:.1f}%".format(
                nsteps, 100 * (i + 1) / nsteps),
                  end='\r')
        print('')

        chain = sampler.chain
        samps = sampler.flatchain[0]
        lnprobs = sampler.lnprobability[0].reshape((-1, ))
        lnlikes = sampler.lnlikelihood
        betas = sampler.betas

        if sampleFile is not None:
            f = h5.File(sampleFile, 'a')
            if 'ptemcee/chain' in f:
                f['ptemcee/chain'].resize(sampler.chain.shape)
                f['ptemcee/chain'][...] = sampler.chain[...]
            else:
                f.create_dataset('ptemcee/chain',
                                 data=sampler.chain,
                                 maxshape=(None, None, None, None))

            if 'ptemcee/lnprobability' in f:
                f['ptemcee/lnprobability'].resize(sampler.lnprobability.shape)
                f['ptemcee/lnprobability'][...] = sampler.lnprobability[...]
            else:
                f.create_dataset('ptemcee/lnprobability',
                                 data=sampler.lnprobability,
                                 maxshape=(None, None, None))

            if 'ptemcee/lnlikelihood' in f:
                f['ptemcee/lnlikelihood'].resize(sampler.lnlikelihood.shape)
                f['ptemcee/lnlikelihood'][...] = sampler.lnlikelihood[...]
            else:
                f.create_dataset('ptemcee/lnlikelihood',
                                 data=sampler.lnlikelihood,
                                 maxshape=(None, None, None))

            if 'ptemcee/betas' in f:
                f['ptemcee/betas'].resize(betas.shape)
                f['ptemcee/betas'][...] = betas[...]
            else:
                f.create_dataset('ptemcee/betas',
                                 data=betas,
                                 maxshape=(None, ))
            f.close()

    labels = ['C{0:01d}'.format(i) for i in range(ndim)]

    fig = corner.corner(samps, labels=labels)
    figname = "emceePT_corner.png"
    print("Saving", figname)
    fig.savefig(figname)
    plt.close(fig)

    # for k in range(ntemps):
    for k in [0, ntemps - 1]:
        for i in range(ndim):
            fig, ax = plt.subplots(1, 1, figsize=(8, 4))
            for j in range(nwalkers):
                ax.plot(chain[k, j, :, i], alpha=2.0 / nwalkers, color='k')
            ax.set_xlabel('# Iterations')
            ax.set_ylabel(labels[i])

            figname = "emceePT_trace_T{0:01d}_{1:s}.png".format(k, labels[i])
            print("Saving", figname)
            fig.savefig(figname)
            plt.close(fig)

    imap = lnprobs.argmax()

    taus = autocorr.integrated_time(chain, timeAxis=2, walkerAxis=1)
    lnlike_taus = autocorr.integrated_time(lnlikes, timeAxis=2, walkerAxis=1)
    print("emceePT AutoCorrTau:", taus)
    print("emceePT AutoCorrTau logLike:", lnlike_taus)

    lnlike_adj = lnlikes - lnlikes.mean(axis=(1, 2), keepdims=True)
    lnlike_var = (lnlike_adj * lnlike_adj).mean(axis=(1, 2))

    xmap = samps[imap]
    means = samps.mean(axis=0)
    diffs = samps - means
    cov = (diffs[:, :, None] * diffs[:, None, :]).mean(axis=0)

    avglnl = lnlikes.mean(axis=(1, 2))[::-1]
    avglnl_err = np.sqrt(lnlike_taus / (nsteps * nwalkers) * lnlike_var)[::-1]
    betas = betas[::-1]
    if betas[0] > 0.0:
        betas = np.concatenate(([0.0], betas))
        avglnl = np.concatenate(([avglnl[0]], avglnl))
        avglnl_err = np.concatenate(([avglnl_err[0]], avglnl_err))

    return xmap, means, cov, samps, lnprobs, avglnl, betas, avglnl_err