def initialize_population(self): """Create an initial population from the prior distribution.""" population = [] var_info = OrderedDict() if self.start is None: init_rnd = sample_prior_predictive( self.draws, var_names=[v.name for v in self.model.unobserved_RVs], model=self.model, ) else: init_rnd = self.start init = self.model.initial_point for v in self.variables: var_info[v.name] = (init[v.name].shape, init[v.name].size) for i in range(self.draws): point = Point( {v.name: init_rnd[v.name][i] for v in self.variables}, model=self.model) population.append(DictToArrayBijection.map(point).data) self.posterior = np.array(floatX(population)) self.var_info = var_info
def initialize_population(self): """Create an initial population from the prior distribution.""" population = [] var_info = OrderedDict() init_rnd = sample_prior_predictive( self.draws, var_names=[v.name for v in self.model.unobserved_RVs], model=self.model, ) init = self.model.test_point for v in self.variables: var_info[v.name] = (init[v.name].shape, init[v.name].size) for i in range(self.draws): point = Point({v.name: init_rnd[v.name][i] for v in self.variables}, model=self.model) population.append(self.model.dict_to_array(point)) self.nf_samples = np.array(floatX(population)) self.live_points = np.array(floatX(population)) self.var_info = var_info self.posterior = np.empty((0, np.shape(self.nf_samples)[1]))
def initialize_population(self): """Create an initial population from the prior distribution.""" population = [] if self.init_samples is None: init_rnd = sample_prior_predictive( self.N, var_names=[v.name for v in self.model.unobserved_RVs], model=self.model, ) for i in range(self.N): point = Point( {v.name: init_rnd[v.name][i] for v in self.variables}, model=self.model) population.append(self.model.dict_to_array(point)) self.prior_samples = np.array(floatX(population)) elif self.init_samples is not None: self.prior_samples = np.copy(self.init_samples) self.samples = np.copy(self.prior_samples) self.nf_samples = np.copy(self.samples) self.get_posterior_logp() self.get_prior_logp() self.log_weight = self.posterior_logp - self.prior_logp self.log_evidence = logsumexp(self.log_weight) - np.log( len(self.log_weight)) self.evidence = np.exp(self.log_evidence) self.log_weight = self.log_weight - self.log_evidence self.regularize_weights() #same as in fitnf but prior~q self.log_weight_pq_num = self.posterior_logp + 2 * self.prior_logp self.log_weight_pq_den = 3 * self.prior_logp self.log_evidence_pq = logsumexp(self.log_weight_pq_num) - logsumexp( self.log_weight_pq_den) self.evidence_pq = np.exp(self.log_evidence_pq) self.log_weight_pq = self.posterior_logp - self.prior_logp - self.log_evidence_pq self.pq_bw_loss = np.log( (np.exp(self.posterior_logp) - np.exp(self.log_evidence_pq + self.prior_logp))**2) #not actually used yet I think self.regularize_weights_pq() #sum of mean loss (p - q*Z_pq)^2 /N for diagnostic purposes self.log_mean_loss = np.log( np.mean((np.exp(self.posterior_logp) - np.exp(self.prior_logp + self.log_evidence_pq))**2)) self.init_weights_cleanup(lambda x: self.prior_logp(x), lambda x: self.prior_dlogp(x)) self.q_ess = self.calculate_ess(self.log_weight) self.total_ess = self.calculate_ess(self.sinf_logw) self.all_logq = np.array([]) self.nf_models = [] self.nf_models_uw = []
def __init__(self, vars=None, scaling=None, step_scale=0.25, is_cov=False, model=None, blocked=True, potential=None, integrator="leapfrog", dtype=None, **theano_kwargs): """Set up Hamiltonian samplers with common structures. Parameters ---------- vars : list of theano variables scaling : array_like, ndim = {1,2} Scaling for momentum distribution. 1d arrays interpreted matrix diagonal. step_scale : float, default=0.25 Size of steps to take, automatically scaled down by 1/n**(1/4) is_cov : bool, default=False Treat scaling as a covariance matrix/vector if True, else treat it as a precision matrix/vector model : pymc3 Model instance blocked: bool, default=True potential : Potential, optional An object that represents the Hamiltonian with methods `velocity`, `energy`, and `random` methods. **theano_kwargs: passed to theano functions """ model = modelcontext(model) if vars is None: vars = model.cont_vars vars = inputvars(vars) super(BaseHMC, self).__init__(vars, blocked=blocked, model=model, dtype=dtype, **theano_kwargs) size = self._logp_dlogp_func.size if scaling is None and potential is None: mean = floatX(np.zeros(size)) var = floatX(np.ones(size)) potential = QuadPotentialDiagAdapt(size, mean, var, 10) if isinstance(scaling, dict): point = Point(scaling, model=model) scaling = guess_scaling(point, model=model, vars=vars) if scaling is not None and potential is not None: raise ValueError("Can not specify both potential and scaling.") self.step_size = step_scale / (size ** 0.25) if potential is not None: self.potential = potential else: self.potential = quad_potential(scaling, is_cov) self.integrator = integration.CpuLeapfrogIntegrator(self.potential, self._logp_dlogp_func)
def __call__(self, q0_dict: dict) -> dict: """Returns proposed sample given the current sample in dictionary form (q0_dict).""" # Logging is reduced to avoid extensive console output # during multiple recursive calls of subsample() _log = logging.getLogger("pymc3") _log.setLevel(logging.ERROR) with self.model_below: # Check if the tuning flag has been set to False # in which case tuning is stopped. The flag is set # to False (by MLDA's astep) when the burn-in # iterations of the highest-level MLDA sampler run out. # The change propagates to all levels. if self.tune: # Subsample in tuning mode self.trace = subsample( draws=0, step=self.step_method_below, start=q0_dict, trace=self.trace, tune=self.subsampling_rate, ) else: # Subsample in normal mode without tuning # If DEMetropolisZMLDA is the base sampler a flag is raised to # make sure that history is edited after tuning ends if self.tuning_end_trigger: if isinstance(self.step_method_below, DEMetropolisZMLDA): self.step_method_below.tuning_end_trigger = True self.tuning_end_trigger = False self.trace = subsample( draws=self.subsampling_rate, step=self.step_method_below, start=q0_dict, trace=self.trace, ) # set logging back to normal _log.setLevel(logging.NOTSET) # return sample with index self.subchain_selection from the generated # sequence of length self.subsampling_rate. The index is set within # MLDA's astep() function new_point = self.trace.point(-self.subsampling_rate + self.subchain_selection) new_point = Point(new_point, model=self.model_below, filter_model_vars=True) return new_point
def find_hessian_diag(point, vars=None, model=None): """ Returns Hessian of logp at the point passed. Parameters ---------- model: Model (optional if in `with` context) point: dict vars: list Variables for which Hessian is to be calculated. """ model = modelcontext(model) H = model.fastfn(hessian_diag(model.logpt, vars)) return H(Point(point, model=model))
def _iter_sample(draws, step, start=None, trace=None, chain=0, tune=None, model=None, random_seed=-1): """ Modified from :func:`pymc3.sampling._iter_sample` tune: int adaptiv step-size scaling is stopped after this chain sample """ model = modelcontext(model) draws = int(draws) if draws < 1: raise ValueError('Argument `draws` should be above 0.') if start is None: start = {} if random_seed != -1: seed(random_seed) try: step = CompoundStep(step) except TypeError: pass point = Point(start, model=model) step.chain_index = chain trace.setup(draws, chain) for i in range(draws): if i == tune: step = stop_tuning(step) logger.debug('Step: Chain_%i step_%i' % (chain, i)) point, out_list = step.step(point) trace.write(out_list, i) yield trace
def __init__(self, vars=None, scaling=None, step_scale=0.25, is_cov=False, model=None, blocked=True, use_single_leapfrog=False, **theano_kwargs): """Superclass to implement Hamiltonian/hybrid monte carlo Parameters ---------- vars : list of theano variables scaling : array_like, ndim = {1,2} Scaling for momentum distribution. 1d arrays interpreted matrix diagonal. step_scale : float, default=0.25 Size of steps to take, automatically scaled down by 1/n**(1/4) is_cov : bool, default=False Treat scaling as a covariance matrix/vector if True, else treat it as a precision matrix/vector state State object model : pymc3 Model instance. default=Context model blocked: Boolean, default True use_single_leapfrog: Boolean, will leapfrog steps take a single step at a time. default False. **theano_kwargs: passed to theano functions """ model = modelcontext(model) if vars is None: vars = model.cont_vars vars = inputvars(vars) if scaling is None: scaling = model.test_point if isinstance(scaling, dict): scaling = guess_scaling(Point(scaling, model=model), model=model, vars=vars) n = scaling.shape[0] self.step_size = step_scale / (n ** 0.25) self.potential = quad_potential(scaling, is_cov, as_cov=False) shared = make_shared_replacements(vars, model) if theano_kwargs is None: theano_kwargs = {} self.H, self.compute_energy, self.leapfrog, self._vars = get_theano_hamiltonian_functions( vars, shared, model.logpt, self.potential, use_single_leapfrog, **theano_kwargs) super(BaseHMC, self).__init__(vars, shared, blocked=blocked)
def __init__(self, model=None): # Get the model self.model = pm.modelcontext(model) # Get the variables self.varnames = get_default_varnames(self.model.unobserved_RVs, False) # Get the starting point self.start = Point(self.model.test_point, model=self.model) self.ndim = len(self.start) self.mean = None self.cov = None # Compile the log probability function self.vars = inputvars(self.model.cont_vars) self.bij = DictToArrayBijection(ArrayOrdering(self.vars), self.start) self.func = get_theano_function_for_var( self.model.logpt, model=self.model )
def test_missing_data(self): # Originally from a case described in #3122 X = np.random.binomial(1, 0.5, 10) X[0] = -1 # masked a single value X = np.ma.masked_values(X, value=-1) with pm.Model() as m: x1 = pm.Uniform("x1", 0.0, 1.0) x2 = pm.Bernoulli("x2", x1, observed=X) gf = m.logp_dlogp_function() gf._extra_are_set = True assert m["x2_missing"].type == gf._extra_vars_shared["x2_missing"].type pnt = m.test_point.copy() del pnt["x2_missing"] res = [gf(DictToArrayBijection.map(Point(pnt, model=m))) for i in range(5)] assert reduce(lambda x, y: np.array_equal(x, y) and y, res) is not False
def fixed_hessian(point, vars=None, model=None): """ Returns a fixed Hessian for any chain location. Parameters ---------- model: Model (optional if in `with` context) point: dict vars: list Variables for which Hessian is to be calculated. """ model = modelcontext(model) if vars is None: vars = model.cont_vars vars = inputvars(vars) point = Point(point, model=model) rval = np.ones(DictToArrayBijection.map(point).size) / 10 return rval
def __init__(self, vars=None, model=None, point=None): self.model = pm.modelcontext(model) # Work out the full starting coordinates if point is None: point = self.model.test_point else: pm.util.update_start_vals(point, self.model.test_point, self.model) # Fit all the parameters by default if vars is None: vars = self.model.cont_vars self.vars = inputvars(vars) allinmodel(self.vars, self.model) # Work out the relevant bijection map point = Point(point, model=self.model) self.bijection = DictToArrayBijection(ArrayOrdering(self.vars), point) # Pre-compile the theano model and gradient nlp = -self.model.logpt grad = theano.grad(nlp, self.vars, disconnected_inputs="ignore") self.func = get_theano_function_for_var([nlp] + grad, model=self.model)
def __init__(self, vars=None, scaling=None, step_scale=0.25, is_cov=False, model=None, blocked=True, potential=None, dtype=None, Emax=1000, target_accept=0.8, gamma=0.05, k=0.75, t0=10, adapt_step_size=True, step_rand=None, **aesara_kwargs): """Set up Hamiltonian samplers with common structures. Parameters ---------- vars: list of aesara variables scaling: array_like, ndim = {1,2} Scaling for momentum distribution. 1d arrays interpreted matrix diagonal. step_scale: float, default=0.25 Size of steps to take, automatically scaled down by 1/n**(1/4) is_cov: bool, default=False Treat scaling as a covariance matrix/vector if True, else treat it as a precision matrix/vector model: pymc3 Model instance blocked: bool, default=True potential: Potential, optional An object that represents the Hamiltonian with methods `velocity`, `energy`, and `random` methods. **aesara_kwargs: passed to aesara functions """ self._model = modelcontext(model) if vars is None: vars = self._model.cont_vars vars = inputvars(vars) super().__init__(vars, blocked=blocked, model=model, dtype=dtype, **aesara_kwargs) self.adapt_step_size = adapt_step_size self.Emax = Emax self.iter_count = 0 size = self._logp_dlogp_func.size self.step_size = step_scale / (size**0.25) self.step_adapt = step_sizes.DualAverageAdaptation( self.step_size, target_accept, gamma, k, t0) self.target_accept = target_accept self.tune = True if scaling is None and potential is None: mean = floatX(np.zeros(size)) var = floatX(np.ones(size)) potential = QuadPotentialDiagAdapt(size, mean, var, 10) if isinstance(scaling, dict): point = Point(scaling, model=model) scaling = guess_scaling(point, model=model, vars=vars) if scaling is not None and potential is not None: raise ValueError("Can not specify both potential and scaling.") if potential is not None: self.potential = potential else: self.potential = quad_potential(scaling, is_cov) self.integrator = integration.CpuLeapfrogIntegrator( self.potential, self._logp_dlogp_func) self._step_rand = step_rand self._warnings = [] self._samples_after_tune = 0 self._num_divs_sample = 0
def find_MAP(start=None, vars=None, method="L-BFGS-B", return_raw=False, include_transformed=True, progressbar=True, maxeval=5000, model=None, *args, **kwargs): """ Finds the local maximum a posteriori point given a model. find_MAP should not be used to initialize the NUTS sampler. Simply call pymc3.sample() and it will automatically initialize NUTS in a better way. Parameters ---------- start: `dict` of parameter values (Defaults to `model.test_point`) vars: list List of variables to optimize and set to optimum (Defaults to all continuous). method: string or callable Optimization algorithm (Defaults to 'L-BFGS-B' unless discrete variables are specified in `vars`, then `Powell` which will perform better). For instructions on use of a callable, refer to SciPy's documentation of `optimize.minimize`. return_raw: bool Whether to return the full output of scipy.optimize.minimize (Defaults to `False`) include_transformed: bool, optional defaults to True Flag for reporting automatically transformed variables in addition to original variables. progressbar: bool, optional defaults to True Whether or not to display a progress bar in the command line. maxeval: int, optional, defaults to 5000 The maximum number of times the posterior distribution is evaluated. model: Model (optional if in `with` context) *args, **kwargs Extra args passed to scipy.optimize.minimize Notes ----- Older code examples used find_MAP() to initialize the NUTS sampler, but this is not an effective way of choosing starting values for sampling. As a result, we have greatly enhanced the initialization of NUTS and wrapped it inside pymc3.sample() and you should thus avoid this method. """ model = modelcontext(model) if start is None: start = model.test_point else: update_start_vals(start, model.test_point, model) check_start_vals(start, model) if vars is None: vars = model.cont_vars vars = inputvars(vars) disc_vars = list(typefilter(vars, discrete_types)) allinmodel(vars, model) start = Point(start, model=model) bij = DictToArrayBijection(ArrayOrdering(vars), start) logp_func = bij.mapf(model.fastlogp_nojac) x0 = bij.map(start) try: dlogp_func = bij.mapf(model.fastdlogp_nojac(vars)) compute_gradient = True except (AttributeError, NotImplementedError, tg.NullTypeGradError): compute_gradient = False if disc_vars or not compute_gradient: pm._log.warning( "Warning: gradient not available." + "(E.g. vars contains discrete variables). MAP " + "estimates may not be accurate for the default " + "parameters. Defaulting to non-gradient minimization " + "'Powell'.") method = "Powell" if "fmin" in kwargs: fmin = kwargs.pop("fmin") warnings.warn( "In future versions, set the optimization algorithm with a string. " 'For example, use `method="L-BFGS-B"` instead of ' '`fmin=sp.optimize.fmin_l_bfgs_b"`.') cost_func = CostFuncWrapper(maxeval, progressbar, logp_func) # Check to see if minimization function actually uses the gradient if "fprime" in getargspec(fmin).args: def grad_logp(point): return nan_to_num(-dlogp_func(point)) opt_result = fmin(cost_func, x0, fprime=grad_logp, *args, **kwargs) else: # Check to see if minimization function uses a starting value if "x0" in getargspec(fmin).args: opt_result = fmin(cost_func, x0, *args, **kwargs) else: opt_result = fmin(cost_func, *args, **kwargs) if isinstance(opt_result, tuple): mx0 = opt_result[0] else: mx0 = opt_result else: # remove 'if' part, keep just this 'else' block after version change if compute_gradient: cost_func = CostFuncWrapper(maxeval, progressbar, logp_func, dlogp_func) else: cost_func = CostFuncWrapper(maxeval, progressbar, logp_func) try: opt_result = minimize(cost_func, x0, method=method, jac=compute_gradient, *args, **kwargs) mx0 = opt_result["x"] # r -> opt_result except (KeyboardInterrupt, StopIteration) as e: mx0, opt_result = cost_func.previous_x, None if isinstance(e, StopIteration): pm._log.info(e) finally: last_v = cost_func.n_eval if progressbar: assert isinstance(cost_func.progress, ProgressBar) cost_func.progress.total = last_v cost_func.progress.update(last_v) print() vars = get_default_varnames(model.unobserved_RVs, include_transformed) mx = { var.name: value for var, value in zip(vars, model.fastfn(vars)(bij.rmap(mx0))) } if return_raw: return mx, opt_result else: return mx
def __init__(self, vars=None, scaling=None, step_scale=0.25, is_cov=False, model=None, blocked=True, use_single_leapfrog=False, potential=None, integrator="leapfrog", **theano_kwargs): """Superclass to implement Hamiltonian/hybrid monte carlo Parameters ---------- vars : list of theano variables scaling : array_like, ndim = {1,2} Scaling for momentum distribution. 1d arrays interpreted matrix diagonal. step_scale : float, default=0.25 Size of steps to take, automatically scaled down by 1/n**(1/4) is_cov : bool, default=False Treat scaling as a covariance matrix/vector if True, else treat it as a precision matrix/vector model : pymc3 Model instance. default=Context model blocked: Boolean, default True use_single_leapfrog: Boolean, will leapfrog steps take a single step at a time. default False. potential : Potential, optional An object that represents the Hamiltonian with methods `velocity`, `energy`, and `random` methods. **theano_kwargs: passed to theano functions """ model = modelcontext(model) if vars is None: vars = model.cont_vars vars = inputvars(vars) if scaling is None and potential is None: size = sum(np.prod(var.dshape, dtype=int) for var in vars) mean = floatX(np.zeros(size)) var = floatX(np.ones(size)) potential = QuadPotentialDiagAdapt(size, mean, var, 10) if isinstance(scaling, dict): point = Point(scaling, model=model) scaling = guess_scaling(point, model=model, vars=vars) if scaling is not None and potential is not None: raise ValueError("Can not specify both potential and scaling.") self.step_size = step_scale / (model.ndim**0.25) if potential is not None: self.potential = potential else: self.potential = quad_potential(scaling, is_cov) shared = make_shared_replacements(vars, model) if theano_kwargs is None: theano_kwargs = {} self.H, self.compute_energy, self.compute_velocity, self.leapfrog, self.dlogp = get_theano_hamiltonian_functions( vars, shared, model.logpt, self.potential, use_single_leapfrog, integrator, **theano_kwargs) super(BaseHMC, self).__init__(vars, shared, blocked=blocked)
def __init__(self, vars=None, out_vars=None, covariance=None, scale=1., n_chains=100, tune=True, tune_interval=100, model=None, check_bound=True, likelihood_name='like', backend='csv', proposal_name='MultivariateNormal', **kwargs): model = modelcontext(model) if vars is None: vars = model.vars vars = inputvars(vars) if out_vars is None: out_vars = model.unobserved_RVs out_varnames = [out_var.name for out_var in out_vars] self.scaling = utility.scalar2floatX(num.atleast_1d(scale)) self.tune = tune self.check_bound = check_bound self.tune_interval = tune_interval self.steps_until_tune = tune_interval self.stage_sample = 0 self.cumulative_samples = 0 self.accepted = 0 self.beta = 1. self.stage = 0 self.chain_index = 0 # needed to use the same parallel implementation function as for SMC self.resampling_indexes = num.arange(n_chains) self.n_chains = n_chains self.likelihood_name = likelihood_name self._llk_index = out_varnames.index(likelihood_name) self.backend = backend self.discrete = num.concatenate( [[v.dtype in discrete_types] * (v.dsize or 1) for v in vars]) self.any_discrete = self.discrete.any() self.all_discrete = self.discrete.all() # create initial population self.population = [] self.array_population = num.zeros(n_chains) logger.info('Creating initial population for {}' ' chains ...'.format(self.n_chains)) for i in range(self.n_chains): self.population.append( Point({v.name: v.random() for v in vars}, model=model)) self.population[0] = model.test_point shared = make_shared_replacements(vars, model) self.logp_forw = logp_forw(out_vars, vars, shared) self.check_bnd = logp_forw([model.varlogpt], vars, shared) super(Metropolis, self).__init__(vars, out_vars, shared) # init proposal if covariance is None and proposal_name in multivariate_proposals: t0 = time() self.covariance = init_proposal_covariance( bij=self.bij, vars=vars, model=model, pop_size=1000) t1 = time() logger.info('Time for proposal covariance init: %f' % (t1 - t0)) scale = self.covariance elif covariance is None: scale = num.ones(sum(v.dsize for v in vars)) else: scale = covariance self.proposal_name = proposal_name self.proposal_dist = choose_proposal( self.proposal_name, scale=scale) self.proposal_samples_array = self.proposal_dist(n_chains) self.chain_previous_lpoint = [[]] * self.n_chains self._tps = None
def _iter_sample(draws, step, start=None, trace=None, chain=0, tune=None, model=None, random_seed=-1, overwrite=True, update_proposal=False, keep_last=False): """ Modified from :func:`pymc3.sampling._iter_sample` tune: int adaptiv step-size scaling is stopped after this chain sample """ model = modelcontext(model) draws = int(draws) if draws < 1: raise ValueError('Argument `draws` should be above 0.') if start is None: start = {} if random_seed != -1: seed(random_seed) try: step = CompoundStep(step) except TypeError: pass point = Point(start, model=model) step.chain_index = chain trace.setup(draws, chain, overwrite=overwrite) for i in range(draws): if i == tune: step = stop_tuning(step) logger.debug('Step: Chain_%i step_%i' % (chain, i)) point, out_list = step.step(point) try: trace.buffer_write(out_list, step.cumulative_samples) except BufferError: # buffer full last_sample = deepcopy(trace.buffer[-1]) if update_proposal: # only valid for PT for now if step.proposal_name in multivariate_proposals: cov = trace.get_sample_covariance(step) if cov is not None: if not isinstance(trace, MemoryChain): filename = '%s/proposal_cov_chain_%i_%i.%s' % ( trace.dir_path, trace.chain, trace.cov_counter, 'png') from matplotlib import pyplot as plt fig, axs = plt.subplots(1, 1) im = axs.imshow(cov, aspect='auto') plt.colorbar(im) fig.savefig(filename, dpi=150) plt.close(fig) step.proposal_dist = choose_proposal( step.proposal_name, scale=cov) trace.record_buffer() if keep_last: # put last sample back trace.buffer_write(*last_sample) yield trace
def optimize(start=None, vars=None, model=None, return_info=False, verbose=True, **kwargs): """Maximize the log prob of a PyMC3 model using scipy All extra arguments are passed directly to the ``scipy.optimize.minimize`` function. Args: start: The PyMC3 coordinate dictionary of the starting position vars: The variables to optimize model: The PyMC3 model return_info: Return both the coordinate dictionary and the result of ``scipy.optimize.minimize`` verbose: Print the success flag and log probability to the screen """ from scipy.optimize import minimize model = pm.modelcontext(model) # Work out the full starting coordinates if start is None: start = model.test_point else: update_start_vals(start, model.test_point, model) # Fit all the parameters by default if vars is None: vars = model.cont_vars vars = inputvars(vars) allinmodel(vars, model) # Work out the relevant bijection map start = Point(start, model=model) bij = DictToArrayBijection(ArrayOrdering(vars), start) # Pre-compile the theano model and gradient nlp = -model.logpt grad = theano.grad(nlp, vars, disconnected_inputs="ignore") func = get_theano_function_for_var([nlp] + grad, model=model) if verbose: names = [ get_untransformed_name(v.name) if is_transformed_name(v.name) else v.name for v in vars ] sys.stderr.write("optimizing logp for variables: [{0}]\n".format( ", ".join(names))) bar = tqdm.tqdm() # This returns the objective function and its derivatives def objective(vec): res = func(*get_args_for_theano_function(bij.rmap(vec), model=model)) d = dict(zip((v.name for v in vars), res[1:])) g = bij.map(d) if verbose: bar.set_postfix(logp="{0:e}".format(-res[0])) bar.update() return res[0], g # Optimize using scipy.optimize x0 = bij.map(start) initial = objective(x0)[0] kwargs["jac"] = True info = minimize(objective, x0, **kwargs) # Only accept the output if it is better than it was x = info.x if (np.isfinite(info.fun) and info.fun < initial) else x0 # Coerce the output into the right format vars = get_default_varnames(model.unobserved_RVs, True) point = { var.name: value for var, value in zip(vars, model.fastfn(vars)(bij.rmap(x))) } if verbose: bar.close() sys.stderr.write("message: {0}\n".format(info.message)) sys.stderr.write("logp: {0} -> {1}\n".format(-initial, -info.fun)) if not np.isfinite(info.fun): logger.warning("final logp not finite, returning initial point") logger.warning( "this suggests that something is wrong with the model") logger.debug("{0}".format(info)) if return_info: return point, info return point
def find_MAP(start=None, vars=None, method="L-BFGS-B", return_raw=False, include_transformed=True, progressbar=True, maxeval=5000, model=None, *args, **kwargs): """Finds the local maximum a posteriori point given a model. `find_MAP` should not be used to initialize the NUTS sampler. Simply call ``pymc3.sample()`` and it will automatically initialize NUTS in a better way. Parameters ---------- start: `dict` of parameter values (Defaults to `model.initial_point`) vars: list List of variables to optimize and set to optimum (Defaults to all continuous). method: string or callable Optimization algorithm (Defaults to 'L-BFGS-B' unless discrete variables are specified in `vars`, then `Powell` which will perform better). For instructions on use of a callable, refer to SciPy's documentation of `optimize.minimize`. return_raw: bool Whether to return the full output of scipy.optimize.minimize (Defaults to `False`) include_transformed: bool, optional defaults to True Flag for reporting automatically transformed variables in addition to original variables. progressbar: bool, optional defaults to True Whether or not to display a progress bar in the command line. maxeval: int, optional, defaults to 5000 The maximum number of times the posterior distribution is evaluated. model: Model (optional if in `with` context) *args, **kwargs Extra args passed to scipy.optimize.minimize Notes ----- Older code examples used `find_MAP` to initialize the NUTS sampler, but this is not an effective way of choosing starting values for sampling. As a result, we have greatly enhanced the initialization of NUTS and wrapped it inside ``pymc3.sample()`` and you should thus avoid this method. """ model = modelcontext(model) if vars is None: vars = model.cont_vars if not vars: raise ValueError("Model has no unobserved continuous variables.") vars = inputvars(vars) disc_vars = list(typefilter(vars, discrete_types)) allinmodel(vars, model) start = copy.deepcopy(start) if start is None: start = model.initial_point else: model.update_start_vals(start, model.initial_point) model.check_start_vals(start) start = Point(start, model=model) x0 = DictToArrayBijection.map(start) # TODO: If the mapping is fixed, we can simply create graphs for the # mapping and avoid all this bijection overhead def logp_func(x): return DictToArrayBijection.mapf(model.fastlogp_nojac)(RaveledVars( x, x0.point_map_info)) try: # This might be needed for calls to `dlogp_func` # start_map_info = tuple((v.name, v.shape, v.dtype) for v in vars) def dlogp_func(x): return DictToArrayBijection.mapf(model.fastdlogp_nojac(vars))( RaveledVars(x, x0.point_map_info)) compute_gradient = True except (AttributeError, NotImplementedError, tg.NullTypeGradError): compute_gradient = False if disc_vars or not compute_gradient: pm._log.warning( "Warning: gradient not available." + "(E.g. vars contains discrete variables). MAP " + "estimates may not be accurate for the default " + "parameters. Defaulting to non-gradient minimization " + "'Powell'.") method = "Powell" if compute_gradient: cost_func = CostFuncWrapper(maxeval, progressbar, logp_func, dlogp_func) else: cost_func = CostFuncWrapper(maxeval, progressbar, logp_func) try: opt_result = minimize(cost_func, x0.data, method=method, jac=compute_gradient, *args, **kwargs) mx0 = opt_result["x"] # r -> opt_result except (KeyboardInterrupt, StopIteration) as e: mx0, opt_result = cost_func.previous_x, None if isinstance(e, StopIteration): pm._log.info(e) finally: last_v = cost_func.n_eval if progressbar: assert isinstance(cost_func.progress, ProgressBar) cost_func.progress.total = last_v cost_func.progress.update(last_v) print() mx0 = RaveledVars(mx0, x0.point_map_info) vars = get_default_varnames(model.unobserved_value_vars, include_transformed) mx = { var.name: value for var, value in zip( vars, model.fastfn(vars)(DictToArrayBijection.rmap(mx0))) } if return_raw: return mx, opt_result else: return mx