def create_shared_params(self, trace=None, size=None, jitter=1, start=None): if trace is None: if size is None: raise opvi.ParametrizationError( "Need `trace` or `size` to initialize") else: if start is None: start = self.model.initial_point else: start_ = self.model.initial_point.copy() self.model.update_start_vals(start_, start) start = start_ start = pm.floatX(DictToArrayBijection.map(start)) # Initialize particles histogram = np.tile(start, (size, 1)) histogram += pm.floatX( np.random.normal(0, jitter, histogram.shape)) else: histogram = np.empty((len(trace) * len(trace.chains), self.ddim)) i = 0 for t in trace.chains: for j in range(len(trace)): histogram[i] = DictToArrayBijection.map(trace.point(j, t)) i += 1 return dict(histogram=aesara.shared(pm.floatX(histogram), "histogram"))
def step(self, point): bij = DictToArrayBijection(self.ordering, point) inputs = [bij.mapf(x) for x in self.fs] if self.allvars: inputs.append(point) if self.generates_stats: apoint, stats = self.astep(bij.map(point), *inputs) return bij.rmap(apoint), stats else: apoint = self.astep(bij.map(point), *inputs) return bij.rmap(apoint)
def step(self, point): for name, shared_var in self.shared.items(): shared_var.set_value(point[name]) q = DictToArrayBijection.map( {v.name: point[v.name] for v in self.vars}) step_res = self.astep(q) if self.generates_stats: apoint, stats = step_res else: apoint = step_res if not isinstance(apoint, RaveledVars): # We assume that the mapping has stayed the same apoint = RaveledVars(apoint, q.point_map_info) new_point = DictToArrayBijection.rmap(apoint, start_point=point) if self.generates_stats: return new_point, stats return new_point
def test_leapfrog_reversible(): n = 3 np.random.seed(42) start, model, _ = models.non_normal(n) size = sum(start[n.name].size for n in model.value_vars) scaling = floatX(np.random.rand(size)) class HMC(BaseHMC): def _hamiltonian_step(self, *args, **kwargs): pass step = HMC(vars=model.value_vars, model=model, scaling=scaling) step.integrator._logp_dlogp_func.set_extra_values({}) astart = DictToArrayBijection.map(start) p = RaveledVars(floatX(step.potential.random()), astart.point_map_info) q = RaveledVars(floatX(np.random.randn(size)), astart.point_map_info) start = step.integrator.compute_state(p, q) for epsilon in [0.01, 0.1]: for n_steps in [1, 2, 3, 4, 20]: state = start for _ in range(n_steps): state = step.integrator.step(epsilon, state) for _ in range(n_steps): state = step.integrator.step(-epsilon, state) npt.assert_allclose(state.q.data, start.q.data, rtol=1e-5) npt.assert_allclose(state.p.data, start.p.data, rtol=1e-5)
def test_leapfrog_reversible_single(): n = 3 start, model, _ = models.non_normal(n) integrators = ['leapfrog', 'two-stage', 'three-stage'] steps = [BaseHMC(vars=model.vars, model=model, integrator=method, use_single_leapfrog=True) for method in integrators] for method, step in zip(integrators, steps): bij = DictToArrayBijection(step.ordering, start) q0 = bij.map(start) p0 = np.ones(n) * .05 for epsilon in [0.01, 0.1, 1.2]: for n_steps in [1, 2, 3, 4, 20]: dlogp0 = step.dlogp(q0) q, p = q0, p0 dlogp = dlogp0 energy = step.compute_energy(q, p) for _ in range(n_steps): q, p, v, dlogp, _ = step.leapfrog(q, p, dlogp, np.array(epsilon)) p = -p for _ in range(n_steps): q, p, v, dlogp, _ = step.leapfrog(q, p, dlogp, np.array(epsilon)) close_to(q, q0, 1e-8, str(('q', method, n_steps, epsilon))) close_to(-p, p0, 1e-8, str(('p', method, n_steps, epsilon)))
def test_leapfrog_reversible_single(): n = 3 start, model, _ = models.non_normal(n) integrators = ['leapfrog', 'two-stage', 'three-stage'] steps = [BaseHMC(vars=model.vars, model=model, integrator=method, use_single_leapfrog=True) for method in integrators] for method, step in zip(integrators, steps): bij = DictToArrayBijection(step.ordering, start) q0 = bij.map(start) p0 = floatX(np.ones(n) * .05) precision = select_by_precision(float64=1E-8, float32=1E-5) for epsilon in [0.01, 0.1, 1.2]: for n_steps in [1, 2, 3, 4, 20]: dlogp0 = step.dlogp(q0) q, p = q0, p0 dlogp = dlogp0 energy = step.compute_energy(q, p) for _ in range(n_steps): q, p, v, dlogp, _ = step.leapfrog(q, p, dlogp, floatX(np.array(epsilon))) p = -p for _ in range(n_steps): q, p, v, dlogp, _ = step.leapfrog(q, p, dlogp, floatX(np.array(epsilon))) close_to(q, q0, precision, str(('q', method, n_steps, epsilon))) close_to(-p, p0, precision, str(('p', method, n_steps, epsilon)))
class PyMC3Potential: def __init__(self, vars=None, model=None, point=None): self.model = pm.modelcontext(model) # Work out the full starting coordinates if point is None: point = self.model.test_point else: pm.util.update_start_vals(point, self.model.test_point, self.model) # Fit all the parameters by default if vars is None: vars = self.model.cont_vars self.vars = inputvars(vars) allinmodel(self.vars, self.model) # Work out the relevant bijection map point = Point(point, model=self.model) self.bijection = DictToArrayBijection(ArrayOrdering(self.vars), point) # Pre-compile the theano model and gradient nlp = -self.model.logpt grad = theano.grad(nlp, self.vars, disconnected_inputs="ignore") self.func = get_theano_function_for_var([nlp] + grad, model=self.model) def __call__(self, coords): res = self.func(*get_args_for_theano_function( self.bijection.rmap(coords), model=self.model)) d = dict(zip((v.name for v in self.vars), res[1:])) g = self.bijection.map(d) return res[0], g
def initialize_population(self): """Create an initial population from the prior distribution.""" population = [] var_info = OrderedDict() if self.start is None: init_rnd = sample_prior_predictive( self.draws, var_names=[v.name for v in self.model.unobserved_RVs], model=self.model, ) else: init_rnd = self.start init = self.model.initial_point for v in self.variables: var_info[v.name] = (init[v.name].shape, init[v.name].size) for i in range(self.draws): point = Point( {v.name: init_rnd[v.name][i] for v in self.variables}, model=self.model) population.append(DictToArrayBijection.map(point).data) self.posterior = np.array(floatX(population)) self.var_info = var_info
def step(self, point: PointType): partial_funcs_and_point = [ DictToArrayBijection.mapf(x, start_point=point) for x in self.fs ] if self.allvars: partial_funcs_and_point.append(point) apoint = DictToArrayBijection.map( {v.name: point[v.name] for v in self.vars}) step_res = self.astep(apoint, *partial_funcs_and_point) if self.generates_stats: apoint_new, stats = step_res else: apoint_new = step_res if not isinstance(apoint_new, RaveledVars): # We assume that the mapping has stayed the same apoint_new = RaveledVars(apoint_new, apoint.point_map_info) point_new = DictToArrayBijection.rmap(apoint_new, start_point=point) if self.generates_stats: return point_new, stats return point_new
def astep(self, q0: RaveledVars) -> Tuple[RaveledVars, List[Dict[str, Any]]]: point_map_info = q0.point_map_info q0 = q0.data if not self.steps_until_tune and self.tune: if self.tune == "scaling": self.scaling = tune(self.scaling, self.accepted / float(self.tune_interval)) elif self.tune == "lambda": self.lamb = tune(self.lamb, self.accepted / float(self.tune_interval)) # Reset counter self.steps_until_tune = self.tune_interval self.accepted = 0 epsilon = self.proposal_dist() * self.scaling # differential evolution proposal # select two other chains ir1, ir2 = np.random.choice(self.other_chains, 2, replace=False) r1 = DictToArrayBijection.map(self.population[ir1]) r2 = DictToArrayBijection.map(self.population[ir2]) # propose a jump q = floatX(q0 + self.lamb * (r1.data - r2.data) + epsilon) accept = self.delta_logp(q, q0) q_new, accepted = metrop_select(accept, q, q0) self.accepted += accepted self.steps_until_tune -= 1 stats = { "tune": self.tune, "scaling": self.scaling, "lambda": self.lamb, "accept": np.exp(accept), "accepted": accepted, } q_new = RaveledVars(q_new, point_map_info) return q_new, [stats]
class ArrayStepShared(BlockedStep): """Faster version of ArrayStep that requires the substep method that does not wrap the functions the step method uses. Works by setting shared variables before using the step. This eliminates the mapping and unmapping overhead as well as moving fewer variables around. """ def __init__(self, vars, shared, blocked=True): """ Parameters ---------- vars: list of sampling variables shared: dict of aesara variable -> shared variable blocked: Boolean (default True) """ self.vars = vars self.ordering = ArrayOrdering(vars) self.shared = { get_var_name(var): shared for var, shared in shared.items() } self.blocked = blocked self.bij = None def step(self, point): for var, share in self.shared.items(): share.set_value(point[var]) self.bij = DictToArrayBijection(self.ordering, point) if self.generates_stats: apoint, stats = self.astep(self.bij.map(point)) return self.bij.rmap(apoint), stats else: apoint = self.astep(self.bij.map(point)) return self.bij.rmap(apoint)
def test_leapfrog_reversible(): n = 3 start, model, _ = models.non_normal(n) step = BaseHMC(vars=model.vars, model=model) bij = DictToArrayBijection(step.ordering, start) q0 = bij.map(start) p0 = floatX(np.ones(n) * .05) precision = select_by_precision(float64=1E-8, float32=1E-4) for epsilon in [.01, .1, 1.2]: for n_steps in [1, 2, 3, 4, 20]: q, p = q0, p0 q, p, _ = step.leapfrog(q, p, floatX(np.array(epsilon)), np.array(n_steps, dtype='int32')) q, p, _ = step.leapfrog(q, -p, floatX(np.array(epsilon)), np.array(n_steps, dtype='int32')) close_to(q, q0, precision, str((n_steps, epsilon))) close_to(-p, p0, precision, str((n_steps, epsilon)))
class ArrayStepSharedLLK(BlockedStep): """ Modified ArrayStepShared To handle returned larger point including the likelihood values. Takes additionally a list of output vars including the likelihoods. Parameters ---------- vars : list variables to be sampled out_vars : list variables to be stored in the traces shared : dict theano variable -> shared variables blocked : boolen (default True) """ def __init__(self, vars, out_vars, shared, blocked=True): self.vars = vars self.ordering = ArrayOrdering(vars) self.lordering = ListArrayOrdering(out_vars, intype='tensor') lpoint = [var.tag.test_value for var in out_vars] self.shared = {var.name: shared for var, shared in shared.items()} self.blocked = blocked self.bij = DictToArrayBijection(self.ordering, self.population[0]) blacklist = list( set(self.lordering.variables) - set([var.name for var in vars])) self.lij = ListToArrayBijection(self.lordering, lpoint, blacklist=blacklist) def __getstate__(self): return self.__dict__ def __setstate__(self, state): self.__dict__.update(state) def step(self, point): for var, share in self.shared.items(): share.container.storage[0] = point[var] apoint, alist = self.astep(self.bij.map(point)) return self.bij.rmap(apoint), alist
def test_leapfrog_reversible(): n = 3 start, model, _ = models.non_normal(n) step = BaseHMC(vars=model.vars, model=model) bij = DictToArrayBijection(step.ordering, start) q0 = bij.map(start) p0 = np.ones(n) * .05 for epsilon in [.01, .1, 1.2]: for n_steps in [1, 2, 3, 4, 20]: q, p = q0, p0 q, p, _ = step.leapfrog(q, p, np.array(epsilon), np.array(n_steps, dtype='int32')) q, p, _ = step.leapfrog(q, -p, np.array(epsilon), np.array(n_steps, dtype='int32')) close_to(q, q0, 1e-8, str((n_steps, epsilon))) close_to(-p, p0, 1e-8, str((n_steps, epsilon)))
def create_shared_params(self, start=None): if start is None: start = self.model.initial_point else: start_ = start.copy() self.model.update_start_vals(start_, self.model.initial_point) start = start_ if self.batched: start = start[self.group[0].name][0] else: start = DictToArrayBijection.map(start) rho = np.zeros((self.ddim, )) if self.batched: start = np.tile(start, (self.bdim, 1)) rho = np.tile(rho, (self.bdim, 1)) return { "mu": aesara.shared(pm.floatX(start), "mu"), "rho": aesara.shared(pm.floatX(rho), "rho"), }
def create_shared_params(self, start=None): if start is None: start = self.model.initial_point else: start_ = start.copy() self.model.update_start_vals(start_, self.model.initial_point) start = start_ if self.batched: start = start[self.group[0].name][0] else: start = DictToArrayBijection.map(start) n = self.ddim L_tril = np.eye(n)[np.tril_indices(n)].astype(aesara.config.floatX) if self.batched: start = np.tile(start, (self.bdim, 1)) L_tril = np.tile(L_tril, (self.bdim, 1)) return { "mu": aesara.shared(start, "mu"), "L_tril": aesara.shared(L_tril, "L_tril") }
def test_missing_data(self): # Originally from a case described in #3122 X = np.random.binomial(1, 0.5, 10) X[0] = -1 # masked a single value X = np.ma.masked_values(X, value=-1) with pm.Model() as m: x1 = pm.Uniform("x1", 0.0, 1.0) x2 = pm.Bernoulli("x2", x1, observed=X) gf = m.logp_dlogp_function() gf._extra_are_set = True assert m["x2_missing"].type == gf._extra_vars_shared["x2_missing"].type pnt = m.test_point.copy() del pnt["x2_missing"] res = [gf(DictToArrayBijection.map(Point(pnt, model=m))) for i in range(5)] assert reduce(lambda x, y: np.array_equal(x, y) and y, res) is not False
class ModelWrapper: def __init__(self, start=None, vars=None, model=None): model = self.model = pm.modelcontext(model) # Work out the full starting coordinates if start is None: start = model.test_point else: update_start_vals(start, model.test_point, model) self.start = start # Fit all the parameters by default if vars is None: vars = model.cont_vars vars = self.vars = inputvars(vars) allinmodel(vars, model) # Work out the relevant bijection map start = Point(start, model=model) self.bij = DictToArrayBijection(ArrayOrdering(vars), start) # Pre-compile the theano model and gradient nlp = -model.logpt grad = theano.grad(nlp, vars, disconnected_inputs="ignore") self.func = get_theano_function_for_var([nlp] + grad, model=model) def __call__(self, vec): try: res = self.func(*get_args_for_theano_function(self.bij.rmap(vec), model=self.model)) except Exception: import traceback print("array:", vec) print("point:", self.bij.rmap(vec)) traceback.print_exc() raise d = dict(zip((v.name for v in self.vars), res[1:])) g = self.bij.map(d) return res[0], g
def fixed_hessian(point, vars=None, model=None): """ Returns a fixed Hessian for any chain location. Parameters ---------- model: Model (optional if in `with` context) point: dict vars: list Variables for which Hessian is to be calculated. """ model = modelcontext(model) if vars is None: vars = model.cont_vars vars = inputvars(vars) point = Point(point, model=model) rval = np.ones(DictToArrayBijection.map(point).size) / 10 return rval
def find_MAP(start=None, vars=None, method="L-BFGS-B", return_raw=False, include_transformed=True, progressbar=True, maxeval=5000, model=None, *args, **kwargs): """ Finds the local maximum a posteriori point given a model. find_MAP should not be used to initialize the NUTS sampler. Simply call pymc3.sample() and it will automatically initialize NUTS in a better way. Parameters ---------- start: `dict` of parameter values (Defaults to `model.test_point`) vars: list List of variables to optimize and set to optimum (Defaults to all continuous). method: string or callable Optimization algorithm (Defaults to 'L-BFGS-B' unless discrete variables are specified in `vars`, then `Powell` which will perform better). For instructions on use of a callable, refer to SciPy's documentation of `optimize.minimize`. return_raw: bool Whether to return the full output of scipy.optimize.minimize (Defaults to `False`) include_transformed: bool, optional defaults to True Flag for reporting automatically transformed variables in addition to original variables. progressbar: bool, optional defaults to True Whether or not to display a progress bar in the command line. maxeval: int, optional, defaults to 5000 The maximum number of times the posterior distribution is evaluated. model: Model (optional if in `with` context) *args, **kwargs Extra args passed to scipy.optimize.minimize Notes ----- Older code examples used find_MAP() to initialize the NUTS sampler, but this is not an effective way of choosing starting values for sampling. As a result, we have greatly enhanced the initialization of NUTS and wrapped it inside pymc3.sample() and you should thus avoid this method. """ model = modelcontext(model) if start is None: start = model.test_point else: update_start_vals(start, model.test_point, model) check_start_vals(start, model) if vars is None: vars = model.cont_vars vars = inputvars(vars) disc_vars = list(typefilter(vars, discrete_types)) allinmodel(vars, model) start = Point(start, model=model) bij = DictToArrayBijection(ArrayOrdering(vars), start) logp_func = bij.mapf(model.fastlogp_nojac) x0 = bij.map(start) try: dlogp_func = bij.mapf(model.fastdlogp_nojac(vars)) compute_gradient = True except (AttributeError, NotImplementedError, tg.NullTypeGradError): compute_gradient = False if disc_vars or not compute_gradient: pm._log.warning( "Warning: gradient not available." + "(E.g. vars contains discrete variables). MAP " + "estimates may not be accurate for the default " + "parameters. Defaulting to non-gradient minimization " + "'Powell'.") method = "Powell" if "fmin" in kwargs: fmin = kwargs.pop("fmin") warnings.warn( "In future versions, set the optimization algorithm with a string. " 'For example, use `method="L-BFGS-B"` instead of ' '`fmin=sp.optimize.fmin_l_bfgs_b"`.') cost_func = CostFuncWrapper(maxeval, progressbar, logp_func) # Check to see if minimization function actually uses the gradient if "fprime" in getargspec(fmin).args: def grad_logp(point): return nan_to_num(-dlogp_func(point)) opt_result = fmin(cost_func, x0, fprime=grad_logp, *args, **kwargs) else: # Check to see if minimization function uses a starting value if "x0" in getargspec(fmin).args: opt_result = fmin(cost_func, x0, *args, **kwargs) else: opt_result = fmin(cost_func, *args, **kwargs) if isinstance(opt_result, tuple): mx0 = opt_result[0] else: mx0 = opt_result else: # remove 'if' part, keep just this 'else' block after version change if compute_gradient: cost_func = CostFuncWrapper(maxeval, progressbar, logp_func, dlogp_func) else: cost_func = CostFuncWrapper(maxeval, progressbar, logp_func) try: opt_result = minimize(cost_func, x0, method=method, jac=compute_gradient, *args, **kwargs) mx0 = opt_result["x"] # r -> opt_result except (KeyboardInterrupt, StopIteration) as e: mx0, opt_result = cost_func.previous_x, None if isinstance(e, StopIteration): pm._log.info(e) finally: last_v = cost_func.n_eval if progressbar: assert isinstance(cost_func.progress, ProgressBar) cost_func.progress.total = last_v cost_func.progress.update(last_v) print() vars = get_default_varnames(model.unobserved_RVs, include_transformed) mx = { var.name: value for var, value in zip(vars, model.fastfn(vars)(bij.rmap(mx0))) } if return_raw: return mx, opt_result else: return mx
class MCMCInterface: """ An interface for using a ``pymc3`` model with a plain vanilla MCMC sampler. Args: model (optional): The ``pymc3`` model. If ``None`` (default), uses the current model on the stack. """ def __init__(self, model=None): # Get the model self.model = pm.modelcontext(model) # Get the variables self.varnames = get_default_varnames(self.model.unobserved_RVs, False) # Get the starting point self.start = Point(self.model.test_point, model=self.model) self.ndim = len(self.start) self.mean = None self.cov = None # Compile the log probability function self.vars = inputvars(self.model.cont_vars) self.bij = DictToArrayBijection(ArrayOrdering(self.vars), self.start) self.func = get_theano_function_for_var( self.model.logpt, model=self.model ) def optimize(self, **kwargs): """ Maximize the log probability of a ``pymc3`` model. This routine wraps ``pymc3_ext.optimize``, which in turn wraps the ``scipy.optimize.minimize`` function. This method accepts any of the keywords accepted by either of those two functions. Returns: The array of parameter values at the optimum point. """ self.map_soln, self.info = optimize( model=self.model, return_info=True, **kwargs ) self.mean = self.info["x"] self.cov = self.info["hess_inv"] return self.mean def get_initial_state( self, nwalkers=30, var=None, check_finite=True, max_tries=100 ): """ Generate random initial points for sampling. If the ``optimize`` method was called beforehand, this method returns samples from a multidimensional Gaussian centered on the maximum a posteriori (MAP) solution with covariance equal to the inverse of the Hessian matrix at that point, unless ``var`` is provided, in which case that is used instead. If the optimizer was not called, this method returns samples from a Gaussian with mean equal to the model's test point (``model.test_point``) and variance equal to ``var``. Args: var (float, array, or matrix, optional): Variance of the multidimensional Gaussian used to draw samples. This quantity is optional if ``optimize`` was called beforehand, otherwise it must be provided. Default is ``None``. Returns: An array of shape ``(nwalkers, ndim)`` where ``ndim`` is the number of free model parameters. """ if var is None: if self.mean is not None and self.cov is not None: # User ran `optimize`, so let's sample from # the Laplacian approximation at the MAP point mean = self.mean cov = self.cov else: raise ValueError( "Please provide a variance `var`, or run `optimize` before calling this method." ) else: if self.mean is not None: # User ran `optimize`, so let's sample around # the MAP point mean = self.mean else: # Sample around the test value mean = self.bij.map(self.start) cov = var * np.eye(len(mean)) # Sample from the Gaussian p0 = np.random.multivariate_normal(mean, cov, size=nwalkers) # Ensure the log probability is finite everywhere if check_finite: for k in range(nwalkers): n = 0 while not np.isfinite(self.logp(p0[k])): if n > max_tries: raise ValueError( "Unable to initialize walkers at a point with finite `logp`. " "Try reducing `var` or running `optimize()`." ) p0[k] = np.random.multivariate_normal(mean, cov) return p0 def logp(self, x): """ Return the log probability evaluated at a point. Args: x (array): The array of parameter values. Returns: The value of the log probability function evaluated at ``x``. """ try: res = self.func( *get_args_for_theano_function( self.bij.rmap(x), model=self.model ) ) except Exception: import traceback print("array:", x) print("point:", self.bij.rmap(x)) traceback.print_exc() raise return res def transform(self, samples, varnames=None, progress=True): """ Transform samples from the internal to the user parametrization. Args: samples (array or matrix): The set of points to transform. varnames (list, optional): The names of the parameters to transform to. These may either be strings or the actual ``pymc3`` model variables. If ``None`` (default), these are determined automatically and may be accessed as the ``varnames`` attribute of this class. progress (bool, optional): Display a progress bar? Default ``True``. Returns: An array of shape ``(..., len(varnames))``, where ``... = samples.shape[:-1]``, containing the transformed samples. """ is_1d = len(np.shape(samples)) == 1 samples = np.atleast_2d(samples) if varnames is None: varnames = self.varnames varnames = [v.name if not type(v) is str else v for v in varnames] shape = list(samples.shape) shape[-1] = len(varnames) x = np.zeros(shape) for k in tqdm(range(len(samples)), disable=not progress): point = pmx.optim.get_point(self, samples[k]) for j, name in enumerate(varnames): x[k, j] = point[name] if is_1d: return x.flatten() else: return x
def astep(self, q0): """One MLDA step, given current sample q0""" # Check if the tuning flag has been changed and if yes, # change the proposal's tuning flag and reset self.accepted # This is triggered by _iter_sample while the highest-level MLDA step # method is running. It then propagates to all levels. if self.proposal_dist.tune != self.tune: self.proposal_dist.tune = self.tune # set tune in sub-methods of compound stepper explicitly because # it is not set within sample.py (only the CompoundStep's tune flag is) if isinstance(self.step_method_below, CompoundStep): for method in self.step_method_below.methods: method.tune = self.tune self.accepted = 0 # Convert current sample from numpy array -> # dict before feeding to proposal q0_dict = DictToArrayBijection.rmap(q0) # Set subchain_selection (which sample from the coarse chain # is passed as a proposal to the fine chain). If variance # reduction is used, a random sample is selected as proposal. # If variance reduction is not used, the last sample is # selected as proposal. if self.variance_reduction: self.subchain_selection = np.random.randint(0, self.subsampling_rate) else: self.subchain_selection = self.subsampling_rate - 1 self.proposal_dist.subchain_selection = self.subchain_selection # Call the recursive DA proposal to get proposed sample # and convert dict -> numpy array pre_q = self.proposal_dist(q0_dict) q = DictToArrayBijection.map(pre_q) # Evaluate MLDA acceptance log-ratio # If proposed sample from lower levels is the same as current one, # do not calculate likelihood, just set accept to 0.0 if (q.data == q0.data).all(): accept = np.float(0.0) skipped_logp = True else: accept = self.delta_logp(q.data, q0.data) + self.delta_logp_below(q0.data, q.data) skipped_logp = False # Accept/reject sample - next sample is stored in q_new q_new, accepted = metrop_select(accept, q, q0) if skipped_logp: accepted = False # if sample is accepted, update self.Q_last with the sample's Q value # runs only for VR or when store_Q_fine is True if self.variance_reduction or self.store_Q_fine: if accepted and not skipped_logp: self.Q_last = self.model.Q.get_value() # Variance reduction if self.variance_reduction: self.update_vr_variables(accepted, skipped_logp) # Adaptive error model - runs only during tuning. if self.tune and self.adaptive_error_model: self.update_error_estimate(accepted, skipped_logp) # Update acceptance counter self.accepted += accepted stats = {"tune": self.tune, "accept": np.exp(accept), "accepted": accepted} # Save the VR statistics to the stats dictionary (only happens in the # top MLDA level) if (self.variance_reduction or self.store_Q_fine) and not self.is_child: q_stats = {} if self.variance_reduction: m = self for level in range(self.num_levels - 1, 0, -1): # save the Q differences for this level and iteration q_stats[f"Q_{level}_{level - 1}"] = np.array(m.Q_diff) # this makes sure Q_diff is reset for # the next iteration m.Q_diff = [] if level == 1: break m = m.step_method_below q_stats["Q_0"] = np.array(m.Q_base_full) m.Q_base_full = [] if self.store_Q_fine: q_stats["Q_" + str(self.num_levels - 1)] = np.array(self.Q_last) stats = {**stats, **q_stats} # Capture the base tuning stats from the level below. self.base_tuning_stats = [] if isinstance(self.step_method_below, MLDA): self.base_tuning_stats = self.step_method_below.base_tuning_stats elif isinstance(self.step_method_below, MetropolisMLDA): self.base_tuning_stats.append({"base_scaling": self.step_method_below.scaling[0]}) elif isinstance(self.step_method_below, DEMetropolisZMLDA): self.base_tuning_stats.append( { "base_scaling": self.step_method_below.scaling[0], "base_lambda": self.step_method_below.lamb, } ) elif isinstance(self.step_method_below, CompoundStep): # Below method is CompoundStep for method in self.step_method_below.methods: if isinstance(method, MetropolisMLDA): self.base_tuning_stats.append({"base_scaling": method.scaling[0]}) elif isinstance(method, DEMetropolisZMLDA): self.base_tuning_stats.append( {"base_scaling": method.scaling[0], "base_lambda": method.lamb} ) return q_new, [stats] + self.base_tuning_stats
def __init__( self, draws: int, tune: int, step_method, step_method_pickled, chain: int, seed, start, mp_ctx, pickle_backend, ): self.chain = chain process_name = "worker_chain_%s" % chain self._msg_pipe, remote_conn = multiprocessing.Pipe() self._shared_point = {} self._point = {} for name, shape, dtype in DictToArrayBijection.map(start).point_map_info: size = 1 for dim in shape: size *= int(dim) size *= dtype.itemsize if size != ctypes.c_size_t(size).value: raise ValueError("Variable %s is too large" % name) array = mp_ctx.RawArray("c", size) self._shared_point[name] = (array, shape, dtype) array_np = np.frombuffer(array, dtype).reshape(shape) array_np[...] = start[name] self._point[name] = array_np self._readable = True self._num_samples = 0 if step_method_pickled is not None: step_method_send = step_method_pickled else: step_method_send = step_method self._process = mp_ctx.Process( daemon=True, name=process_name, target=_run_process, args=( process_name, remote_conn, step_method_send, step_method_pickled is not None, self._shared_point, draws, tune, seed, pickle_backend, ), ) self._process.start() # Close the remote pipe, so that we get notified if the other # end is closed. remote_conn.close()
def optimize(start=None, vars=None, model=None, return_info=False, verbose=True, **kwargs): """Maximize the log prob of a PyMC3 model using scipy All extra arguments are passed directly to the ``scipy.optimize.minimize`` function. Args: start: The PyMC3 coordinate dictionary of the starting position vars: The variables to optimize model: The PyMC3 model return_info: Return both the coordinate dictionary and the result of ``scipy.optimize.minimize`` verbose: Print the success flag and log probability to the screen """ from scipy.optimize import minimize model = pm.modelcontext(model) # Work out the full starting coordinates if start is None: start = model.test_point else: update_start_vals(start, model.test_point, model) # Fit all the parameters by default if vars is None: vars = model.cont_vars vars = inputvars(vars) allinmodel(vars, model) # Work out the relevant bijection map start = Point(start, model=model) bij = DictToArrayBijection(ArrayOrdering(vars), start) # Pre-compile the theano model and gradient nlp = -model.logpt grad = theano.grad(nlp, vars, disconnected_inputs="ignore") func = get_theano_function_for_var([nlp] + grad, model=model) if verbose: names = [ get_untransformed_name(v.name) if is_transformed_name(v.name) else v.name for v in vars ] sys.stderr.write("optimizing logp for variables: [{0}]\n".format( ", ".join(names))) bar = tqdm.tqdm() # This returns the objective function and its derivatives def objective(vec): res = func(*get_args_for_theano_function(bij.rmap(vec), model=model)) d = dict(zip((v.name for v in vars), res[1:])) g = bij.map(d) if verbose: bar.set_postfix(logp="{0:e}".format(-res[0])) bar.update() return res[0], g # Optimize using scipy.optimize x0 = bij.map(start) initial = objective(x0)[0] kwargs["jac"] = True info = minimize(objective, x0, **kwargs) # Only accept the output if it is better than it was x = info.x if (np.isfinite(info.fun) and info.fun < initial) else x0 # Coerce the output into the right format vars = get_default_varnames(model.unobserved_RVs, True) point = { var.name: value for var, value in zip(vars, model.fastfn(vars)(bij.rmap(x))) } if verbose: bar.close() sys.stderr.write("message: {0}\n".format(info.message)) sys.stderr.write("logp: {0} -> {1}\n".format(-initial, -info.fun)) if not np.isfinite(info.fun): logger.warning("final logp not finite, returning initial point") logger.warning( "this suggests that something is wrong with the model") logger.debug("{0}".format(info)) if return_info: return point, info return point
def find_MAP(start=None, vars=None, method="L-BFGS-B", return_raw=False, include_transformed=True, progressbar=True, maxeval=5000, model=None, *args, **kwargs): """Finds the local maximum a posteriori point given a model. `find_MAP` should not be used to initialize the NUTS sampler. Simply call ``pymc3.sample()`` and it will automatically initialize NUTS in a better way. Parameters ---------- start: `dict` of parameter values (Defaults to `model.initial_point`) vars: list List of variables to optimize and set to optimum (Defaults to all continuous). method: string or callable Optimization algorithm (Defaults to 'L-BFGS-B' unless discrete variables are specified in `vars`, then `Powell` which will perform better). For instructions on use of a callable, refer to SciPy's documentation of `optimize.minimize`. return_raw: bool Whether to return the full output of scipy.optimize.minimize (Defaults to `False`) include_transformed: bool, optional defaults to True Flag for reporting automatically transformed variables in addition to original variables. progressbar: bool, optional defaults to True Whether or not to display a progress bar in the command line. maxeval: int, optional, defaults to 5000 The maximum number of times the posterior distribution is evaluated. model: Model (optional if in `with` context) *args, **kwargs Extra args passed to scipy.optimize.minimize Notes ----- Older code examples used `find_MAP` to initialize the NUTS sampler, but this is not an effective way of choosing starting values for sampling. As a result, we have greatly enhanced the initialization of NUTS and wrapped it inside ``pymc3.sample()`` and you should thus avoid this method. """ model = modelcontext(model) if vars is None: vars = model.cont_vars if not vars: raise ValueError("Model has no unobserved continuous variables.") vars = inputvars(vars) disc_vars = list(typefilter(vars, discrete_types)) allinmodel(vars, model) start = copy.deepcopy(start) if start is None: start = model.initial_point else: model.update_start_vals(start, model.initial_point) model.check_start_vals(start) start = Point(start, model=model) x0 = DictToArrayBijection.map(start) # TODO: If the mapping is fixed, we can simply create graphs for the # mapping and avoid all this bijection overhead def logp_func(x): return DictToArrayBijection.mapf(model.fastlogp_nojac)(RaveledVars( x, x0.point_map_info)) try: # This might be needed for calls to `dlogp_func` # start_map_info = tuple((v.name, v.shape, v.dtype) for v in vars) def dlogp_func(x): return DictToArrayBijection.mapf(model.fastdlogp_nojac(vars))( RaveledVars(x, x0.point_map_info)) compute_gradient = True except (AttributeError, NotImplementedError, tg.NullTypeGradError): compute_gradient = False if disc_vars or not compute_gradient: pm._log.warning( "Warning: gradient not available." + "(E.g. vars contains discrete variables). MAP " + "estimates may not be accurate for the default " + "parameters. Defaulting to non-gradient minimization " + "'Powell'.") method = "Powell" if compute_gradient: cost_func = CostFuncWrapper(maxeval, progressbar, logp_func, dlogp_func) else: cost_func = CostFuncWrapper(maxeval, progressbar, logp_func) try: opt_result = minimize(cost_func, x0.data, method=method, jac=compute_gradient, *args, **kwargs) mx0 = opt_result["x"] # r -> opt_result except (KeyboardInterrupt, StopIteration) as e: mx0, opt_result = cost_func.previous_x, None if isinstance(e, StopIteration): pm._log.info(e) finally: last_v = cost_func.n_eval if progressbar: assert isinstance(cost_func.progress, ProgressBar) cost_func.progress.total = last_v cost_func.progress.update(last_v) print() mx0 = RaveledVars(mx0, x0.point_map_info) vars = get_default_varnames(model.unobserved_value_vars, include_transformed) mx = { var.name: value for var, value in zip( vars, model.fastfn(vars)(DictToArrayBijection.rmap(mx0))) } if return_raw: return mx, opt_result else: return mx