def logp(self, pvals=None): """ Calculate the log-prior of the system Parameters ---------- pvals : array-like or refnx.analysis.Parameters values for the varying or entire set of parameters Returns ------- logp : float log-prior probability Notes ----- The log-prior is calculated as: .. code-block:: python logp = np.sum(param.logp() for param in self.varying_parameters()) """ self.setp(pvals) logp = np.sum([ param.logp() for param in f_unique( p for p in flatten(self.parameters) if p.vary) ]) if not np.isfinite(logp): return -np.inf return logp
def nvary(self): """ Returns ------- nvary : int The number of :class:`Parameter` contained in this object that are allowed to vary. """ return len([1 for param in f_unique(flatten(self.data)) if param.vary])
def constrained_parameters(self): """ Returns ------- constrained_parameters : list A list of unique :class:`Parameter` contained in this object that have constraints. """ return [param for param in f_unique(flatten(self.data)) if param.constraint is not None]
def logp(self): """ Calculates logp for all the parameters Returns ------- logp : float Log probability for all the parameters """ # logp for all the parameters return np.sum([param.logp() for param in f_unique(flatten(self.data)) if param.vary])
def varying_parameters(self): """ Unique list of varying parameters Returns ------- p : list Unique list of varying parameters """ p = [param for param in f_unique(flatten(self.data)) if param.vary] q = Parameters() q.data = p return q
def varying_parameters(self): """ Returns ------- varying_parameters : refnx.analysis.Parameters The varying Parameter objects allowed to vary during the fit. """ # create and return a Parameters object because it has the # __array__ method, which allows one to quickly get numerical values. p = Parameters() p.data = list(f_unique(p for p in flatten(self.parameters) if p.vary)) return p
def pvals(self, pvals): varying = [param for param in f_unique(flatten(self.data)) if param.vary] if np.size(pvals) == len(varying): [setattr(param, 'value', pvals[i]) for i, param in enumerate(varying)] return flattened_parameters = list(flatten(self.data)) if np.size(pvals) == len(flattened_parameters): [setattr(param, 'value', pvals[i]) for i, param in enumerate(flattened_parameters)] return else: raise ValueError("You supplied the wrong number of values %d when " "setting this Parameters.pvals attribute" % len(pvals))
def flattened(self, unique=False): """ A list of all the :class:`Parameter` contained in this object, including those contained within :class:`Parameters` at any depth. Parameters ---------- unique : bool The list will only contain unique objects. Returns ------- params : list A list of :class:`Parameter` contained in this object. """ if unique: return list(f_unique(flatten(self.data))) else: return list(flatten(self.data))
def bounds(self): bounds = list( (p.bounds.lb, p.bounds.ub) for p in f_unique(flatten(self.objective.parameters)) if p.vary ) self.lowerBounds = np.array(list(bound[0] for bound in bounds)) self.upperBounds = np.array(list(bound[1] for bound in bounds))
def __contains__(self, item): return id(item) in [id(p) for p in f_unique(flatten(self.data))]
def __contains__(self, item): """ Does this instance contain a given :class:`Parameter` """ return id(item) in [id(p) for p in f_unique(flatten(self.data))]
def constrained_parameters(self): """ List of constrained parameters """ return [param for param in f_unique(flatten(self.data)) if param.constraint is not None]
def nvary(self): return np.sum([1 for param in f_unique(flatten(self.data)) if param.vary])
def flattened(self, unique=False): if unique: return list(f_unique(flatten(self.data))) else: return list(flatten(self.data))
def fit(self, method="L-BFGS-B", target="nll", verbose=True, **kws): """ Obtain the maximum log-likelihood, or log-posterior, estimate (mode) of the objective. Maximising the log-likelihood is equivalent to minimising chi2 in a least squares fit. Parameters ---------- method : str which method to use for the optimisation. One of: - `'least_squares'`: :func:`scipy.optimize.least_squares`. - `'L-BFGS-B'`: L-BFGS-B. - `'differential_evolution'`: :func:`scipy.optimize.differential_evolution` - `'dual_annealing'`: :func:`scipy.optimize.dual_annealing` (SciPy >= 1.2.0) - `'shgo'`: :func:`scipy.optimize.shgo` (SciPy >= 1.2.0) You can also choose many of the minimizers from :func:`scipy.optimize.minimize`. target : {'nll', 'nlpost'}, optional Minimize the negative log-likelihood (`'nll'`) or the negative log-posterior (`'nlpost'`). This is equivalent to maximising the likelihood or posterior probabilities respectively. Maximising the likelihood is equivalent to minimising chi^2 in a least-squares fit. This option only applies to the `differential_evolution`, `shgo`, `dual_annealing` or `L-BFGS-B` methods. These optimisers require lower and upper (box) bounds for each parameter. If the `Bounds` on a parameter are not an `Interval`, but a `PDF` specifying a statistical distribution, then the lower and upper bounds are approximated as ``PDF.rv.ppf([0.005, 0.995])``, covering 99 % of the statistical distribution. verbose : bool, optional Gives fitting progress. To see a progress bar tqdm has to be installed. kws : dict Additional arguments are passed to the underlying minimization method. Returns ------- result, covar : :class:`scipy.optimize.OptimizeResult`, np.ndarray `result.x` contains the best fit parameters `result.covar` is the covariance matrix for the fit. `result.stderr` is the uncertainties on each of the fit parameters. Notes ----- If the `objective` supplies a `residuals` method then `least_squares` can be used. Otherwise the `nll` method of the `objective` is minimised. Use this method just before a sampling run. If `self.objective.parameters` is a `Parameters` instance, then each of the varying parameters has its value updated by the fit, and each `Parameter` has a `stderr` attribute which represents the uncertainty on the fit parameter. The use of `dual annealing` and `shgo` requires that `scipy >= 1.2.0` be installed. """ _varying_parameters = self.objective.varying_parameters() init_pars = np.array(_varying_parameters) _min_kws = {} _min_kws.update(kws) _bounds = bounds_list(self.objective.varying_parameters()) _min_kws["bounds"] = _bounds # setup callback default _min_kws.setdefault("callback", None) cost = self.objective.nll if target == "nlpost": cost = self.objective.nlpost # a decorator for the progress bar updater def _callback_wrapper(callback_func, pbar): def callback(*args, **kwds): pbar.update(1) if callback_func is None: return None else: return callback_func(*args, **kwds) return callback # least_squares Trust Region Reflective by default if method == "least_squares": b = np.array(_bounds) _min_kws["bounds"] = (b[..., 0], b[..., 1]) # least_squares doesn't have a callback _min_kws.pop("callback", None) res = least_squares(self.objective.residuals, init_pars, **_min_kws) # differential_evolution, dual_annealing, shgo require lower and upper # bounds elif method in ["differential_evolution", "dual_annealing", "shgo"]: mini = getattr(sciopt, method) with get_progress_bar(verbose, None) as pbar: _min_kws["callback"] = _callback_wrapper( _min_kws["callback"], pbar) res = mini(cost, **_min_kws) else: # otherwise stick it to minimizer. Default being L-BFGS-B _min_kws["method"] = method _min_kws["bounds"] = _bounds with get_progress_bar(verbose, None) as pbar: _min_kws["callback"] = _callback_wrapper( _min_kws["callback"], pbar) res = minimize(cost, init_pars, **_min_kws) # OptimizeResult.success may not be present (dual annealing) if hasattr(res, "success") and res.success: self.objective.setp(res.x) # Covariance matrix estimation covar = self.objective.covar() errors = np.sqrt(np.diag(covar)) res["covar"] = covar res["stderr"] = errors # check if the parameters are all Parameter instances. flat_params = list(f_unique(flatten(self.objective.parameters))) if np.all([is_parameter(param) for param in flat_params]): # zero out all the old parameter stderrs for param in flat_params: param.stderr = None param.chain = None for i, param in enumerate(_varying_parameters): param.stderr = errors[i] # need to touch up the output to check we leave # parameters as we found them self.objective.setp(res.x) return res
def nDim(self): if self.nDimensions is None: self.nDimensions = len(list(p for p in f_unique(flatten(self.objective.parameters)) if p.vary )) return self.nDimensions
def sample(self, steps, nthin=1, random_state=None, f=None, callback=None, verbose=True, pool=0): """ Performs sampling from the objective. Parameters ---------- steps : int Collect `steps` samples into the chain. The sampler will run a total of `steps * nthin` moves. nthin : int, optional Each chain sample is separated by `nthin` iterations. random_state : int or `np.random.RandomState`, optional If `random_state` is an int, a new `np.random.RandomState` instance is used, seeded with `random_state`. If `random_state` is already a `np.random.RandomState` instance, then that `np.random.RandomState` instance is used. Specify `random_state` for repeatable sampling f : file-like or str File to incrementally save chain progress to. Each row in the file is a flattened array of size `(nwalkers, ndim)` or `(ntemps, nwalkers, ndim)`. There are `steps` rows in the file. callback : callable callback function to be called at each iteration step verbose : bool, optional Gives updates on the sampling progress pool : int or map-like object, optional If `pool` is an `int` then it specifies the number of threads to use for parallelization. If `pool == 0`, then all CPU's are used. If pool is an object with a map method that follows the same calling sequence as the built-in map function, then this pool is used for parallelisation. Notes ----- Please see :class:`emcee.EnsembleSampler` for its detailed behaviour. >>> # we'll burn the first 500 steps >>> fitter.sample(500) >>> # after you've run those, then discard them by resetting the >>> # sampler. >>> fitter.sampler.reset() >>> # Now collect 40 steps, each step separated by 50 sampler >>> # generations. >>> fitter.sample(40, nthin=50) One can also burn and thin in `Curvefitter.process_chain`. """ self._check_vars_unchanged() if self._state is None: self.initialise() self.__pt_iterations = 0 if isinstance(self.sampler, PTSampler): steps *= nthin # for saving progress to file def _callback_wrapper(state, h=None): if callback is not None: callback(state.coords, state.log_prob) if h is not None: # if you're parallel tempering, then you only # want to save every nthin if isinstance(self.sampler, PTSampler): self.__pt_iterations += 1 if self.__pt_iterations % nthin: return None h.write(' '.join(map(str, state.coords.ravel()))) h.write('\n') # set the random state of the sampler # normally one could give this as an argument to the sample method # but PTSampler didn't historically accept that... if random_state is not None: rstate0 = check_random_state(random_state).get_state() self._state.random_state = rstate0 if isinstance(self.sampler, PTSampler): self.sampler._random = rstate0 # remove chains from each of the parameters because they slow down # pickling but only if they are parameter objects. flat_params = f_unique(flatten(self.objective.parameters)) flat_params = [param for param in flat_params if is_parameter(param)] # zero out all the old parameter stderrs for param in flat_params: param.stderr = None param.chain = None # make sure the checkpoint file exists if f is not None: with possibly_open_file(f, 'w') as h: # write the shape of each step of the chain h.write('# ') shape = self._state.coords.shape h.write(', '.join(map(str, shape))) h.write('\n') # using context manager means we kill off zombie pool objects # but does mean that the pool has to be specified each time. with possibly_create_pool(pool) as g, possibly_open_file(f, 'a') as h: # if you're not creating more than 1 thread, then don't bother with # a pool. if pool == 1: self.sampler.pool = None else: self.sampler.pool = g # these kwargs are provided to the sampler.sample method kwargs = {'iterations': steps, 'thin': nthin} # new emcee arguments sampler_args = getargspec(self.sampler.sample).args if 'progress' in sampler_args and verbose: kwargs['progress'] = True verbose = False if 'thin_by' in sampler_args: kwargs['thin_by'] = nthin kwargs.pop('thin', 0) # ptemcee returns coords, lnprob # emcee returns a State object if isinstance(self.sampler, PTSampler): for result in self.sampler.sample(self._state.coords, **kwargs): self._state = State(result[0], log_prob=result[1] + result[2], random_state=self.sampler._random) _callback_wrapper(self._state, h=h) else: for state in self.sampler.sample(self._state, **kwargs): self._state = state _callback_wrapper(state, h=h) self.sampler.pool = None # finish off the progress bar if verbose: sys.stdout.write("\n") # sets parameter value and stderr return process_chain(self.objective, self.chain)
def fit(self, method='L-BFGS-B', **kws): """ Obtain the maximum log-likelihood estimate (mode) of the objective. For a least-squares objective this would correspond to lowest chi2. Parameters ---------- method : str which method to use for the optimisation. One of: - `'least_squares'`: `scipy.optimize.least_squares`. - `'L-BFGS-B'`: L-BFGS-B - `'differential_evolution'`: differential evolution You can also choose many of the minimizers from ``scipy.optimize.minimize``. kws : dict Additional arguments are passed to the underlying minimization method. Returns ------- result, covar : OptimizeResult, np.ndarray `result.x` contains the best fit parameters `result.covar` is the covariance matrix for the fit. `result.stderr` is the uncertainties on each of the fit parameters. Notes ----- If the `objective` supplies a `residuals` method then `least_squares` can be used. Otherwise the `nll` method of the `objective` is minimised. Use this method just before a sampling run. If `self.objective.parameters` is a `Parameters` instance, then each of the varying parameters has its value updated by the fit, and each `Parameter` has a `stderr` attribute which represents the uncertainty on the fit parameter. """ _varying_parameters = self.objective.varying_parameters() init_pars = np.array(_varying_parameters) _min_kws = {} _min_kws.update(kws) _bounds = bounds_list(self.objective.varying_parameters()) _min_kws['bounds'] = _bounds # least_squares Trust Region Reflective by default if method == 'least_squares': b = np.array(_bounds) _min_kws['bounds'] = (b[..., 0], b[..., 1]) res = least_squares(self.objective.residuals, init_pars, **_min_kws) # differential_evolution requires lower and upper bounds elif method == 'differential_evolution': res = differential_evolution(self.objective.nll, **_min_kws) else: # otherwise stick it to minimizer. Default being L-BFGS-B _min_kws['method'] = method _min_kws['bounds'] = _bounds res = minimize(self.objective.nll, init_pars, **_min_kws) if res.success: self.objective.setp(res.x) # Covariance matrix estimation covar = self.objective.covar() errors = np.sqrt(np.diag(covar)) res['covar'] = covar res['stderr'] = errors # check if the parameters are all Parameter instances. flat_params = list(f_unique(flatten( self.objective.parameters))) if np.all([is_parameter(param) for param in flat_params]): # zero out all the old parameter stderrs for param in flat_params: param.stderr = None param.chain = None for i, param in enumerate(_varying_parameters): param.stderr = errors[i] # need to touch up the output to check we leave # parameters as we found them self.objective.setp(res.x) return res
def sample( self, steps, nthin=1, random_state=None, f=None, callback=None, verbose=True, pool=-1, ): """ Performs sampling from the objective. Parameters ---------- steps : int Collect `steps` samples into the chain. The sampler will run a total of `steps * nthin` moves. nthin : int, optional Each chain sample is separated by `nthin` iterations. random_state : {int, `np.random.RandomState`, `np.random.Generator`} If `random_state` is not specified the `~np.random.RandomState` singleton is used. If `random_state` is an int, a new ``RandomState`` instance is used, seeded with random_state. If `random_state` is already a ``RandomState`` or a ``Generator`` instance, then that object is used. Specify `random_state` for repeatable minimizations. f : file-like or str File to incrementally save chain progress to. Each row in the file is a flattened array of size `(nwalkers, ndim)` or `(ntemps, nwalkers, ndim)`. There are `steps` rows in the file. callback : callable callback function to be called at each iteration step. Has the signature `callback(coords, logprob)`. verbose : bool, optional Gives updates on the sampling progress pool : int or map-like object, optional If `pool` is an `int` then it specifies the number of threads to use for parallelization. If `pool == -1`, then all CPU's are used. If pool is a map-like callable that follows the same calling sequence as the built-in map function, then this pool is used for parallelisation. Notes ----- Please see :class:`emcee.EnsembleSampler` for its detailed behaviour. >>> # we'll burn the first 500 steps >>> fitter.sample(500) >>> # after you've run those, then discard them by resetting the >>> # sampler. >>> fitter.sampler.reset() >>> # Now collect 40 steps, each step separated by 50 sampler >>> # generations. >>> fitter.sample(40, nthin=50) One can also burn and thin in `Curvefitter.process_chain`. """ self._check_vars_unchanged() # setup a random number generator rng = check_random_state(random_state) if self._state is None: self.initialise(random_state=rng) # for saving progress to file def _callback_wrapper(state, h=None): if callback is not None: callback(state.coords, state.log_prob) if h is not None: h.write(" ".join(map(str, state.coords.ravel()))) h.write("\n") # remove chains from each of the parameters because they slow down # pickling but only if they are parameter objects. flat_params = f_unique(flatten(self.objective.parameters)) flat_params = [param for param in flat_params if is_parameter(param)] # zero out all the old parameter stderrs for param in flat_params: param.stderr = None param.chain = None # make sure the checkpoint file exists if f is not None: with possibly_open_file(f, "w") as h: # write the shape of each step of the chain h.write("# ") shape = self._state.coords.shape h.write(", ".join(map(str, shape))) h.write("\n") # set the random state of the sampler # normally one could give this as an argument to the sample method # but PTSampler didn't historically accept that... if isinstance(rng, np.random.RandomState): rstate0 = rng.get_state() self._state.random_state = rstate0 self.sampler.random_state = rstate0 # using context manager means we kill off zombie pool objects # but does mean that the pool has to be specified each time. with MapWrapper(pool) as g, possibly_open_file(f, "a") as h: # these kwargs are provided to the sampler.sample method kwargs = {"iterations": steps, "thin": nthin} # if you're not creating more than 1 thread, then don't bother with # a pool. if isinstance(self.sampler, emcee.EnsembleSampler): if pool == 1: self.sampler.pool = None else: self.sampler.pool = g else: kwargs["mapper"] = g # new emcee arguments sampler_args = getargspec(self.sampler.sample).args if "progress" in sampler_args and verbose: kwargs["progress"] = True verbose = False if "thin_by" in sampler_args: kwargs["thin_by"] = nthin kwargs.pop("thin", 0) # perform the sampling for state in self.sampler.sample(self._state, **kwargs): self._state = state _callback_wrapper(state, h=h) if isinstance(self.sampler, emcee.EnsembleSampler): self.sampler.pool = None # sets parameter value and stderr return process_chain(self.objective, self.chain)
def logp(self): # logp for all the parameters return np.sum([param.logp() for param in f_unique(flatten(self.data)) if param.vary])
def process_chain(objective, chain, nburn=0, nthin=1, flatchain=False): """ Process the chain produced by a sampler for a given Objective Parameters ---------- objective : refnx.analysis.Objective The Objective function that the Posterior was sampled for chain : array The MCMC chain nburn : int, optional discard this many steps from the start of the chain nthin : int, optional only accept every `nthin` samples from the chain flatchain : bool, optional collapse the walkers down into a single dimension. Returns ------- [(param, stderr, chain)] : list List of (param, stderr, chain) tuples. If `isinstance(objective.parameters, Parameters)` then `param` is a `Parameter` instance. `param.value`, `param.stderr` and `param.chain` will contain the median, stderr and chain samples, respectively. Otherwise `param` will be a float representing the median of the chain samples. `stderr` is the half width of the [15.87, 84.13] spread (similar to standard deviation) and `chain` is an array containing the MCMC samples for that parameter. Notes ----- The chain should have the shape `(iterations, nwalkers, nvary)` or `(iterations, ntemps, nwalkers, nvary)` if parallel tempering was employed. The burned and thinned chain is created via: `chain[nburn::nthin]`. Note, if parallel tempering is employed, then only the lowest temperature of the parallel tempering chain is processed and returned as it corresponds to the (lowest energy) target distribution. If `flatten is True` then the burned/thinned chain is reshaped and `arr.reshape(-1, nvary)` is returned. This function has the effect of setting the parameter stderr's. """ chain = chain[nburn::nthin] shape = chain.shape nvary = shape[-1] # nwalkers = shape[1] if len(shape) == 4: ntemps = shape[1] elif len(shape) == 3: ntemps = -1 if ntemps != -1: # PTSampler, we require the target distribution in the first row. chain = chain[:, 0] _flatchain = chain.reshape((-1, nvary)) if flatchain: chain = _flatchain flat_params = list(f_unique(flatten(objective.parameters))) varying_parameters = objective.varying_parameters() # set the stderr of each of the Parameters result_list = [] if np.all([is_parameter(param) for param in flat_params]): # zero out all the old parameter stderrs for param in flat_params: param.stderr = None param.chain = None # do the error calcn for the varying parameters and set the chain quantiles = np.percentile(_flatchain, [15.87, 50, 84.13], axis=0) for i, param in enumerate(varying_parameters): std_l, median, std_u = quantiles[:, i] param.value = median param.stderr = 0.5 * (std_u - std_l) # copy in the chain param.chain = np.copy(chain[..., i]) res = MCMCResult( name=param.name, param=param, median=param.value, stderr=param.stderr, chain=param.chain, ) result_list.append(res) fitted_values = np.array(varying_parameters) # give each constrained param a chain (to be reshaped later) constrained_params = [ param for param in flat_params if param.constraint is not None ] for constrain_param in constrained_params: constrain_param.chain = np.empty(chain.shape[:-1], float) # now iterate through the varying parameters, set the values, thereby # setting the constraint value if len(constrained_params): for index in np.ndindex(chain.shape[:-1]): # iterate over parameter vectors pvals = chain[index] objective.setp(pvals) for constrain_param in constrained_params: constrain_param.chain[index] = constrain_param.value for constrain_param in constrained_params: quantiles = np.percentile(constrain_param.chain, [15.87, 50, 84.13]) std_l, median, std_u = quantiles constrain_param.value = median constrain_param.stderr = 0.5 * (std_u - std_l) # now reset fitted parameter values (they would've been changed by # constraints calculations objective.setp(fitted_values) # the parameter set are not Parameter objects, an array was probably # being used with BaseObjective. else: for i in range(nvary): c = np.copy(chain[..., i]) median, stderr = uncertainty_from_chain(c) res = MCMCResult(name="", param=median, median=median, stderr=stderr, chain=c) result_list.append(res) return result_list
def lnprob(self): # lnprob for all the parameters return np.sum([param.lnprob() for param in f_unique(flatten(self.data)) if param.vary])