def test_nested_initvals(self): # See issue #5168 with pm.Model() as pmodel: one = pm.LogNormal("one", mu=np.log(1), sigma=1e-5, initval="prior") two = pm.Lognormal("two", mu=np.log(one * 2), sigma=1e-5, initval="prior") three = pm.LogNormal("three", mu=np.log(two * 2), sigma=1e-5, initval="prior") four = pm.LogNormal("four", mu=np.log(three * 2), sigma=1e-5, initval="prior") five = pm.LogNormal("five", mu=np.log(four * 2), sigma=1e-5, initval="prior") six = pm.LogNormal("six", mu=np.log(five * 2), sigma=1e-5, initval="prior") ip_vals = list( make_initial_point_fn(model=pmodel, return_transformed=True)(0).values()) assert np.allclose(np.exp(ip_vals), [1, 2, 4, 8, 16, 32], rtol=1e-3) ip_vals = list( make_initial_point_fn(model=pmodel, return_transformed=False)(0).values()) assert np.allclose(ip_vals, [1, 2, 4, 8, 16, 32], rtol=1e-3) pmodel.initial_values[four] = 1 ip_vals = list( make_initial_point_fn(model=pmodel, return_transformed=True)(0).values()) assert np.allclose(np.exp(ip_vals), [1, 2, 4, 1, 2, 4], rtol=1e-3) ip_vals = list( make_initial_point_fn(model=pmodel, return_transformed=False)(0).values()) assert np.allclose(ip_vals, [1, 2, 4, 1, 2, 4], rtol=1e-3)
def create_shared_params(self, trace=None, size=None, jitter=1, start=None): if trace is None: if size is None: raise opvi.ParametrizationError( "Need `trace` or `size` to initialize") else: ipfn = make_initial_point_fn( model=self.model, overrides=start, jitter_rvs={}, return_transformed=True, ) start = ipfn( self.model.rng_seeder.randint(2**30, dtype=np.int64)) start = pm.floatX(DictToArrayBijection.map(start)) # Initialize particles histogram = np.tile(start, (size, 1)) histogram += pm.floatX( np.random.normal(0, jitter, histogram.shape)) else: histogram = np.empty((len(trace) * len(trace.chains), self.ddim)) i = 0 for t in trace.chains: for j in range(len(trace)): histogram[i] = DictToArrayBijection.map(trace.point(j, t)) i += 1 return dict(histogram=aesara.shared(pm.floatX(histogram), "histogram"))
def test_simulator_moment(mu, sigma, size): def normal_sim(rng, mu, sigma, size): return rng.normal(mu, sigma, size=size) with Model() as model: x = Simulator("x", normal_sim, mu, sigma, size=size) fn = make_initial_point_fn( model=model, return_transformed=False, default_strategy="moment", ) random_draw = model["x"].eval() result = fn(0)["x"] assert result.shape == random_draw.shape # We perform a z-test between the moment and expected mean from a sample of 10 draws # This test fails if the number of samples averaged in get_moment(Simulator) # is much smaller than 10, but would not catch the case where the number of samples # is higher than the expected 10 n = 10 # samples expected_sample_mean = mu expected_sample_mean_std = np.sqrt(sigma**2 / n) # Multiple test adjustment for z-test to maintain alpha=0.01 alpha = 0.01 alpha /= 2 * 2 * 3 # Correct for number of test permutations alpha /= random_draw.size # Correct for distribution size cutoff = st.norm().ppf(1 - (alpha / 2)) assert np.all( np.abs((result - expected_sample_mean) / expected_sample_mean_std) < cutoff)
def test_untransformed_initial_point(self): with pm.Model() as pmodel: pm.Flat("A", initval="moment") pm.HalfFlat("B", initval="moment") fn = make_initial_point_fn(model=pmodel, jitter_rvs={}, return_transformed=False) iv = fn(0) assert iv["A"] == 0 assert iv["B"] == 1 pass
def assert_moment_is_expected(model, expected): fn = make_initial_point_fn( model=model, return_transformed=False, default_strategy="moment", ) result = fn(0)["x"] expected = np.asarray(expected) try: random_draw = model["x"].eval() except NotImplementedError: random_draw = result assert result.shape == expected.shape == random_draw.shape assert np.allclose(result, expected)
def test_respects_overrides(self): with pm.Model() as pmodel: A = pm.Flat("A", initval="moment") B = pm.HalfFlat("B", initval=4) C = pm.Normal("C", mu=A + B, initval="moment") fn = make_initial_point_fn( model=pmodel, jitter_rvs={}, return_transformed=True, overrides={ A: at.as_tensor(2, dtype=int), B: 3, C: 5, }, ) iv = fn(0) assert iv["A"] == 2 assert np.isclose(iv["B_log__"], np.log(3)) assert iv["C"] == 5
def assert_moment_is_expected(model, expected, check_finite_logp=True): fn = make_initial_point_fn( model=model, return_transformed=False, default_strategy="moment", ) moment = fn(0)["x"] expected = np.asarray(expected) try: random_draw = model["x"].eval() except NotImplementedError: random_draw = moment assert moment.shape == expected.shape == random_draw.shape assert np.allclose(moment, expected) if check_finite_logp: logp_moment = logpt(model["x"], at.constant(moment), transformed=False).eval() assert np.isfinite(logp_moment)
def create_shared_params(self, start=None): ipfn = make_initial_point_fn( model=self.model, overrides=start, jitter_rvs={}, return_transformed=True, ) start = ipfn(self.model.rng_seeder.randint(2**30, dtype=np.int64)) if self.batched: start = start[self.group[0].name][0] else: start = DictToArrayBijection.map(start) rho = np.zeros((self.ddim, )) if self.batched: start = np.tile(start, (self.bdim, 1)) rho = np.tile(rho, (self.bdim, 1)) return { "mu": aesara.shared(pm.floatX(start), "mu"), "rho": aesara.shared(pm.floatX(rho), "rho"), }
def test_string_overrides_work(self): with pm.Model() as pmodel: A = pm.Flat("A", initval=10) B = pm.HalfFlat("B", initval=10) C = pm.HalfFlat("C", initval=10) fn = make_initial_point_fn( model=pmodel, jitter_rvs={}, return_transformed=True, overrides={ "A": 1, "B": 1, "C_log__": 0, }, ) iv = fn(0) assert iv["A"] == 1 assert np.isclose(iv["B_log__"], 0) assert iv["C_log__"] == 0
def create_shared_params(self, start=None): ipfn = make_initial_point_fn( model=self.model, overrides=start, jitter_rvs={}, return_transformed=True, ) start = ipfn(self.model.rng_seeder.randint(2**30, dtype=np.int64)) if self.batched: start = start[self.group[0].name][0] else: start = DictToArrayBijection.map(start) n = self.ddim L_tril = np.eye(n)[np.tril_indices(n)].astype(aesara.config.floatX) if self.batched: start = np.tile(start, (self.bdim, 1)) L_tril = np.tile(L_tril, (self.bdim, 1)) return { "mu": aesara.shared(start, "mu"), "L_tril": aesara.shared(L_tril, "L_tril") }
def test_adds_jitter(self): with pm.Model() as pmodel: A = pm.Flat("A", initval="moment") B = pm.HalfFlat("B", initval="moment") C = pm.Normal("C", mu=A + B, initval="moment") fn = make_initial_point_fn(model=pmodel, jitter_rvs={B}, return_transformed=True) iv = fn(0) # Moment of the Flat is 0 assert iv["A"] == 0 # Moment of the HalfFlat is 1, but HalfFlat is log-transformed by default # so the transformed initial value with jitter will be zero plus a jitter between [-1, 1]. b_transformed = iv["B_log__"] b_untransformed = transform_back(B, b_transformed) assert b_transformed != 0 assert -1 < b_transformed < 1 # C is centered on 0 + untransformed initval of B assert np.isclose( iv["C"], np.array(0 + b_untransformed, dtype=aesara.config.floatX)) # Test jitter respects seeding. assert fn(0) == fn(0) assert fn(0) != fn(1)
def find_MAP(start=None, vars=None, method="L-BFGS-B", return_raw=False, include_transformed=True, progressbar=True, maxeval=5000, model=None, *args, seed: Optional[int] = None, **kwargs): """Finds the local maximum a posteriori point given a model. `find_MAP` should not be used to initialize the NUTS sampler. Simply call ``pymc.sample()`` and it will automatically initialize NUTS in a better way. Parameters ---------- start: `dict` of parameter values (Defaults to `model.initial_point`) vars: list List of variables to optimize and set to optimum (Defaults to all continuous). method: string or callable Optimization algorithm (Defaults to 'L-BFGS-B' unless discrete variables are specified in `vars`, then `Powell` which will perform better). For instructions on use of a callable, refer to SciPy's documentation of `optimize.minimize`. return_raw: bool Whether to return the full output of scipy.optimize.minimize (Defaults to `False`) include_transformed: bool, optional defaults to True Flag for reporting automatically transformed variables in addition to original variables. progressbar: bool, optional defaults to True Whether or not to display a progress bar in the command line. maxeval: int, optional, defaults to 5000 The maximum number of times the posterior distribution is evaluated. model: Model (optional if in `with` context) *args, **kwargs Extra args passed to scipy.optimize.minimize Notes ----- Older code examples used `find_MAP` to initialize the NUTS sampler, but this is not an effective way of choosing starting values for sampling. As a result, we have greatly enhanced the initialization of NUTS and wrapped it inside ``pymc.sample()`` and you should thus avoid this method. """ model = modelcontext(model) if vars is None: vars = model.cont_vars if not vars: raise ValueError("Model has no unobserved continuous variables.") vars = inputvars(vars) disc_vars = list(typefilter(vars, discrete_types)) allinmodel(vars, model) ipfn = make_initial_point_fn( model=model, jitter_rvs={}, return_transformed=True, overrides=start, ) if seed is None: seed = model.rng_seeder.randint(2**30, dtype=np.int64) start = ipfn(seed) model.check_start_vals(start) x0 = DictToArrayBijection.map(start) # TODO: If the mapping is fixed, we can simply create graphs for the # mapping and avoid all this bijection overhead def logp_func(x): return DictToArrayBijection.mapf(model.fastlogp_nojac)(RaveledVars( x, x0.point_map_info)) try: # This might be needed for calls to `dlogp_func` # start_map_info = tuple((v.name, v.shape, v.dtype) for v in vars) def dlogp_func(x): return DictToArrayBijection.mapf(model.fastdlogp_nojac(vars))( RaveledVars(x, x0.point_map_info)) compute_gradient = True except (AttributeError, NotImplementedError, tg.NullTypeGradError): compute_gradient = False if disc_vars or not compute_gradient: pm._log.warning( "Warning: gradient not available." + "(E.g. vars contains discrete variables). MAP " + "estimates may not be accurate for the default " + "parameters. Defaulting to non-gradient minimization " + "'Powell'.") method = "Powell" if compute_gradient: cost_func = CostFuncWrapper(maxeval, progressbar, logp_func, dlogp_func) else: cost_func = CostFuncWrapper(maxeval, progressbar, logp_func) try: opt_result = minimize(cost_func, x0.data, method=method, jac=compute_gradient, *args, **kwargs) mx0 = opt_result["x"] # r -> opt_result except (KeyboardInterrupt, StopIteration) as e: mx0, opt_result = cost_func.previous_x, None if isinstance(e, StopIteration): pm._log.info(e) finally: last_v = cost_func.n_eval if progressbar: assert isinstance(cost_func.progress, ProgressBar) cost_func.progress.total = last_v cost_func.progress.update(last_v) print(file=sys.stdout) mx0 = RaveledVars(mx0, x0.point_map_info) vars = get_default_varnames(model.unobserved_value_vars, include_transformed) mx = { var.name: value for var, value in zip( vars, model.fastfn(vars)(DictToArrayBijection.rmap(mx0))) } if return_raw: return mx, opt_result else: return mx