def step(self, point): for name, shared_var in self.shared.items(): shared_var.set_value(point[name]) q = DictToArrayBijection.map( {v.name: point[v.name] for v in self.vars}) step_res = self.astep(q) if self.generates_stats: apoint, stats = step_res else: apoint = step_res if not isinstance(apoint, RaveledVars): # We assume that the mapping has stayed the same apoint = RaveledVars(apoint, q.point_map_info) new_point = DictToArrayBijection.rmap(apoint, start_point=point) if self.generates_stats: return new_point, stats return new_point
def create_shared_params(self, trace=None, size=None, jitter=1, start=None): if trace is None: if size is None: raise opvi.ParametrizationError( "Need `trace` or `size` to initialize") else: ipfn = make_initial_point_fn( model=self.model, overrides=start, jitter_rvs={}, return_transformed=True, ) start = ipfn( self.model.rng_seeder.randint(2**30, dtype=np.int64)) start = pm.floatX(DictToArrayBijection.map(start)) # Initialize particles histogram = np.tile(start, (size, 1)) histogram += pm.floatX( np.random.normal(0, jitter, histogram.shape)) else: histogram = np.empty((len(trace) * len(trace.chains), self.ddim)) i = 0 for t in trace.chains: for j in range(len(trace)): histogram[i] = DictToArrayBijection.map(trace.point(j, t)) i += 1 return dict(histogram=aesara.shared(pm.floatX(histogram), "histogram"))
def step(self, point: PointType): partial_funcs_and_point = [ DictToArrayBijection.mapf(x, start_point=point) for x in self.fs ] if self.allvars: partial_funcs_and_point.append(point) apoint = DictToArrayBijection.map( {v.name: point[v.name] for v in self.vars}) step_res = self.astep(apoint, *partial_funcs_and_point) if self.generates_stats: apoint_new, stats = step_res else: apoint_new = step_res if not isinstance(apoint_new, RaveledVars): # We assume that the mapping has stayed the same apoint_new = RaveledVars(apoint_new, apoint.point_map_info) point_new = DictToArrayBijection.rmap(apoint_new, start_point=point) if self.generates_stats: return point_new, stats return point_new
def create_shared_params(self, trace=None, size=None, jitter=1, start=None): if trace is None: if size is None: raise opvi.ParametrizationError( "Need `trace` or `size` to initialize") else: start = self._prepare_start(start) # Initialize particles histogram = np.tile(start, (size, 1)) histogram += pm.floatX( np.random.normal(0, jitter, histogram.shape)) else: histogram = np.empty((len(trace) * len(trace.chains), self.ddim)) i = 0 for t in trace.chains: for j in range(len(trace)): histogram[i] = DictToArrayBijection.map(trace.point( j, t)).data i += 1 return dict(histogram=aesara.shared(pm.floatX(histogram), "histogram"))
def test_leapfrog_reversible(): n = 3 np.random.seed(42) start, model, _ = models.non_normal(n) size = sum(start[n.name].size for n in model.value_vars) scaling = floatX(np.random.rand(size)) class HMC(BaseHMC): def _hamiltonian_step(self, *args, **kwargs): pass step = HMC(vars=model.value_vars, model=model, scaling=scaling) step.integrator._logp_dlogp_func.set_extra_values({}) astart = DictToArrayBijection.map(start) p = RaveledVars(floatX(step.potential.random()), astart.point_map_info) q = RaveledVars(floatX(np.random.randn(size)), astart.point_map_info) start = step.integrator.compute_state(p, q) for epsilon in [0.01, 0.1]: for n_steps in [1, 2, 3, 4, 20]: state = start for _ in range(n_steps): state = step.integrator.step(epsilon, state) for _ in range(n_steps): state = step.integrator.step(-epsilon, state) npt.assert_allclose(state.q.data, start.q.data, rtol=1e-5) npt.assert_allclose(state.p.data, start.p.data, rtol=1e-5)
def astep(self, q0: RaveledVars) -> Tuple[RaveledVars, List[Dict[str, Any]]]: point_map_info = q0.point_map_info q0 = q0.data if not self.steps_until_tune and self.tune: if self.tune == "scaling": self.scaling = tune(self.scaling, self.accepted / float(self.tune_interval)) elif self.tune == "lambda": self.lamb = tune(self.lamb, self.accepted / float(self.tune_interval)) # Reset counter self.steps_until_tune = self.tune_interval self.accepted = 0 epsilon = self.proposal_dist() * self.scaling # differential evolution proposal # select two other chains ir1, ir2 = np.random.choice(self.other_chains, 2, replace=False) r1 = DictToArrayBijection.map(self.population[ir1]) r2 = DictToArrayBijection.map(self.population[ir2]) # propose a jump q = floatX(q0 + self.lamb * (r1.data - r2.data) + epsilon) accept = self.delta_logp(q, q0) q_new, accepted = metrop_select(accept, q, q0) self.accepted += accepted self.steps_until_tune -= 1 stats = { "tune": self.tune, "scaling": self.scaling, "lambda": self.lamb, "accept": np.exp(accept), "accepted": accepted, } q_new = RaveledVars(q_new, point_map_info) return q_new, [stats]
def _initialize_kernel(self): """Create variables and logp function necessary to run kernel This method should not be overwritten. If needed, use `setup_kernel` instead. """ # Create dictionary that stores original variables shape and size initial_point = self.model.recompute_initial_point( seed=self.rng.integers(2**30)) for v in self.variables: self.var_info[v.name] = (initial_point[v.name].shape, initial_point[v.name].size) # Create particles bijection map if self.start: init_rnd = self.start else: init_rnd = self.initialize_population() population = [] for i in range(self.draws): point = Point( {v.name: init_rnd[v.name][i] for v in self.variables}, model=self.model) population.append(DictToArrayBijection.map(point).data) self.tempered_posterior = np.array(floatX(population)) # Initialize prior and likelihood log probabilities shared = make_shared_replacements(initial_point, self.variables, self.model) self.prior_logp_func = _logp_forw(initial_point, [self.model.varlogpt], self.variables, shared) self.likelihood_logp_func = _logp_forw(initial_point, [self.model.datalogpt], self.variables, shared) priors = [ self.prior_logp_func(sample) for sample in self.tempered_posterior ] likelihoods = [ self.likelihood_logp_func(sample) for sample in self.tempered_posterior ] self.prior_logp = np.array(priors).squeeze() self.likelihood_logp = np.array(likelihoods).squeeze()
def test_missing_data(self): # Originally from a case described in #3122 X = np.random.binomial(1, 0.5, 10) X[0] = -1 # masked a single value X = np.ma.masked_values(X, value=-1) with pm.Model() as m: x1 = pm.Uniform("x1", 0.0, 1.0) x2 = pm.Bernoulli("x2", x1, observed=X) gf = m.logp_dlogp_function() gf._extra_are_set = True assert m["x2_missing"].type == gf._extra_vars_shared["x2_missing"].type pnt = m.test_point.copy() del pnt["x2_missing"] res = [gf(DictToArrayBijection.map(Point(pnt, model=m))) for i in range(5)] assert reduce(lambda x, y: np.array_equal(x, y) and y, res) is not False
def create_shared_params(self, start=None): ipfn = make_initial_point_fn( model=self.model, overrides=start, jitter_rvs={}, return_transformed=True, ) start = ipfn(self.model.rng_seeder.randint(2**30, dtype=np.int64)) if self.batched: start = start[self.group[0].name][0] else: start = DictToArrayBijection.map(start) rho = np.zeros((self.ddim, )) if self.batched: start = np.tile(start, (self.bdim, 1)) rho = np.tile(rho, (self.bdim, 1)) return { "mu": aesara.shared(pm.floatX(start), "mu"), "rho": aesara.shared(pm.floatX(rho), "rho"), }
def fixed_hessian(point, vars=None, model=None): """ Returns a fixed Hessian for any chain location. Parameters ---------- model: Model (optional if in `with` context) point: dict vars: list Variables for which Hessian is to be calculated. """ model = modelcontext(model) if vars is None: vars = model.cont_vars vars = inputvars(vars) point = Point(point, model=model) rval = np.ones(DictToArrayBijection.map(point).size) / 10 return rval
def create_shared_params(self, start=None): ipfn = make_initial_point_fn( model=self.model, overrides=start, jitter_rvs={}, return_transformed=True, ) start = ipfn(self.model.rng_seeder.randint(2**30, dtype=np.int64)) if self.batched: start = start[self.group[0].name][0] else: start = DictToArrayBijection.map(start) n = self.ddim L_tril = np.eye(n)[np.tril_indices(n)].astype(aesara.config.floatX) if self.batched: start = np.tile(start, (self.bdim, 1)) L_tril = np.tile(L_tril, (self.bdim, 1)) return { "mu": aesara.shared(start, "mu"), "L_tril": aesara.shared(L_tril, "L_tril") }
def astep(self, q0): """Perform a single HMC iteration.""" perf_start = time.perf_counter() process_start = time.process_time() p0 = self.potential.random() p0 = RaveledVars(p0, q0.point_map_info) start = self.integrator.compute_state(q0, p0) if not np.isfinite(start.energy): model = self._model check_test_point = model.point_logps() error_logp = check_test_point.loc[ (np.abs(check_test_point) >= 1e20) | np.isnan(check_test_point) ] self.potential.raise_ok(q0.point_map_info) message_energy = ( "Bad initial energy, check any log probabilities that " "are inf or -inf, nan or very small:\n{}".format(error_logp.to_string()) ) warning = SamplerWarning( WarningType.BAD_ENERGY, message_energy, "critical", self.iter_count, ) self._warnings.append(warning) raise SamplingError("Bad initial energy") adapt_step = self.tune and self.adapt_step_size step_size = self.step_adapt.current(adapt_step) self.step_size = step_size if self._step_rand is not None: step_size = self._step_rand(step_size) hmc_step = self._hamiltonian_step(start, p0.data, step_size) perf_end = time.perf_counter() process_end = time.process_time() self.step_adapt.update(hmc_step.accept_stat, adapt_step) self.potential.update(hmc_step.end.q, hmc_step.end.q_grad, self.tune) if hmc_step.divergence_info: info = hmc_step.divergence_info point = None point_dest = None info_store = None if self.tune: kind = WarningType.TUNING_DIVERGENCE else: kind = WarningType.DIVERGENCE self._num_divs_sample += 1 # We don't want to fill up all memory with divergence info if self._num_divs_sample < 100 and info.state is not None: point = DictToArrayBijection.rmap(info.state.q) if self._num_divs_sample < 100 and info.state_div is not None: point = DictToArrayBijection.rmap(info.state_div.q) if self._num_divs_sample < 100: info_store = info warning = SamplerWarning( kind, info.message, "debug", self.iter_count, info.exec_info, divergence_point_source=point, divergence_point_dest=point_dest, divergence_info=info_store, ) self._warnings.append(warning) self.iter_count += 1 if not self.tune: self._samples_after_tune += 1 stats = { "tune": self.tune, "diverging": bool(hmc_step.divergence_info), "perf_counter_diff": perf_end - perf_start, "process_time_diff": process_end - process_start, "perf_counter_start": perf_start, } stats.update(hmc_step.stats) stats.update(self.step_adapt.stats()) return hmc_step.end.q, [stats]
def __call__(self, q0: RaveledVars) -> RaveledVars: """Returns proposed sample given the current sample in dictionary form (q0_dict).""" # Logging is reduced to avoid extensive console output # during multiple recursive calls of subsample() _log = logging.getLogger("pymc") _log.setLevel(logging.ERROR) # Convert current sample from RaveledVars -> # dict before feeding to subsample. q0_dict = DictToArrayBijection.rmap(q0) with self.model_below: # Check if the tuning flag has been set to False # in which case tuning is stopped. The flag is set # to False (by MLDA's astep) when the burn-in # iterations of the highest-level MLDA sampler run out. # The change propagates to all levels. if self.tune: # Subsample in tuning mode trace = subsample( draws=0, step=self.step_method_below, start=q0_dict, tune=self.subsampling_rate, ) else: # Subsample in normal mode without tuning # If DEMetropolisZMLDA is the base sampler a flag is raised to # make sure that history is edited after tuning ends if self.tuning_end_trigger: if isinstance(self.step_method_below, DEMetropolisZMLDA): self.step_method_below.tuning_end_trigger = True self.tuning_end_trigger = False trace = subsample( draws=self.subsampling_rate, step=self.step_method_below, start=q0_dict, tune=0, ) # set logging back to normal _log.setLevel(logging.NOTSET) # return sample with index self.subchain_selection from the generated # sequence of length self.subsampling_rate. The index is set within # MLDA's astep() function q_dict = trace.point(self.subchain_selection) # Make sure output dict is ordered the same way as the input dict. q_dict = Point( {key: q_dict[key] for key in q0_dict.keys()}, model=self.model_below, filter_model_vars=True, ) return DictToArrayBijection.map(q_dict)
def find_MAP(start=None, vars=None, method="L-BFGS-B", return_raw=False, include_transformed=True, progressbar=True, maxeval=5000, model=None, *args, seed: Optional[int] = None, **kwargs): """Finds the local maximum a posteriori point given a model. `find_MAP` should not be used to initialize the NUTS sampler. Simply call ``pymc.sample()`` and it will automatically initialize NUTS in a better way. Parameters ---------- start: `dict` of parameter values (Defaults to `model.initial_point`) vars: list List of variables to optimize and set to optimum (Defaults to all continuous). method: string or callable Optimization algorithm (Defaults to 'L-BFGS-B' unless discrete variables are specified in `vars`, then `Powell` which will perform better). For instructions on use of a callable, refer to SciPy's documentation of `optimize.minimize`. return_raw: bool Whether to return the full output of scipy.optimize.minimize (Defaults to `False`) include_transformed: bool, optional defaults to True Flag for reporting automatically transformed variables in addition to original variables. progressbar: bool, optional defaults to True Whether or not to display a progress bar in the command line. maxeval: int, optional, defaults to 5000 The maximum number of times the posterior distribution is evaluated. model: Model (optional if in `with` context) *args, **kwargs Extra args passed to scipy.optimize.minimize Notes ----- Older code examples used `find_MAP` to initialize the NUTS sampler, but this is not an effective way of choosing starting values for sampling. As a result, we have greatly enhanced the initialization of NUTS and wrapped it inside ``pymc.sample()`` and you should thus avoid this method. """ model = modelcontext(model) if vars is None: vars = model.cont_vars if not vars: raise ValueError("Model has no unobserved continuous variables.") vars = inputvars(vars) disc_vars = list(typefilter(vars, discrete_types)) allinmodel(vars, model) ipfn = make_initial_point_fn( model=model, jitter_rvs={}, return_transformed=True, overrides=start, ) if seed is None: seed = model.rng_seeder.randint(2**30, dtype=np.int64) start = ipfn(seed) model.check_start_vals(start) x0 = DictToArrayBijection.map(start) # TODO: If the mapping is fixed, we can simply create graphs for the # mapping and avoid all this bijection overhead def logp_func(x): return DictToArrayBijection.mapf(model.fastlogp_nojac)(RaveledVars( x, x0.point_map_info)) try: # This might be needed for calls to `dlogp_func` # start_map_info = tuple((v.name, v.shape, v.dtype) for v in vars) def dlogp_func(x): return DictToArrayBijection.mapf(model.fastdlogp_nojac(vars))( RaveledVars(x, x0.point_map_info)) compute_gradient = True except (AttributeError, NotImplementedError, tg.NullTypeGradError): compute_gradient = False if disc_vars or not compute_gradient: pm._log.warning( "Warning: gradient not available." + "(E.g. vars contains discrete variables). MAP " + "estimates may not be accurate for the default " + "parameters. Defaulting to non-gradient minimization " + "'Powell'.") method = "Powell" if compute_gradient: cost_func = CostFuncWrapper(maxeval, progressbar, logp_func, dlogp_func) else: cost_func = CostFuncWrapper(maxeval, progressbar, logp_func) try: opt_result = minimize(cost_func, x0.data, method=method, jac=compute_gradient, *args, **kwargs) mx0 = opt_result["x"] # r -> opt_result except (KeyboardInterrupt, StopIteration) as e: mx0, opt_result = cost_func.previous_x, None if isinstance(e, StopIteration): pm._log.info(e) finally: last_v = cost_func.n_eval if progressbar: assert isinstance(cost_func.progress, ProgressBar) cost_func.progress.total = last_v cost_func.progress.update(last_v) print(file=sys.stdout) mx0 = RaveledVars(mx0, x0.point_map_info) vars = get_default_varnames(model.unobserved_value_vars, include_transformed) mx = { var.name: value for var, value in zip( vars, model.fastfn(vars)(DictToArrayBijection.rmap(mx0))) } if return_raw: return mx, opt_result else: return mx
def dlogp_func(x): return DictToArrayBijection.mapf(model.fastdlogp_nojac(vars))( RaveledVars(x, x0.point_map_info))
def __init__( self, draws: int, tune: int, step_method, step_method_pickled, chain: int, seed, start: Dict[str, np.ndarray], mp_ctx, ): self.chain = chain process_name = "worker_chain_%s" % chain self._msg_pipe, remote_conn = multiprocessing.Pipe() self._shared_point = {} self._point = {} for name, shape, dtype in DictToArrayBijection.map( start).point_map_info: size = 1 for dim in shape: size *= int(dim) size *= dtype.itemsize if size != ctypes.c_size_t(size).value: raise ValueError("Variable %s is too large" % name) array = mp_ctx.RawArray("c", size) self._shared_point[name] = (array, shape, dtype) array_np = np.frombuffer(array, dtype).reshape(shape) array_np[...] = start[name] self._point[name] = array_np self._readable = True self._num_samples = 0 if step_method_pickled is not None: step_method_send = step_method_pickled else: if mp_ctx.get_start_method() == "spawn": raise ValueError( "please provide a pre-pickled step method when multiprocessing start method is 'spawn'" ) step_method_send = step_method self._process = mp_ctx.Process( daemon=True, name=process_name, target=_run_process, args=( process_name, remote_conn, step_method_send, step_method_pickled is not None, self._shared_point, draws, tune, seed, ), ) self._process.start() # Close the remote pipe, so that we get notified if the other # end is closed. remote_conn.close()