def run(n_samples=3000): model = build_model() start = model.test_point h = pm.find_hessian(start, model=model) step = pm.Metropolis(model.vars, h, blocked=True, model=model) trace = pm.sample(n_samples, step, start, model=model) return trace
def run(n_samples=3000): model = build_model() start = model.test_point h = pm.find_hessian(start, model=model) step = pm.Metropolis(model.vars, h, blocked=True, model=model) trace = pm.sample(n_samples, step=step, start=start, model=model) return trace
def init_nuts(init='advi', n_init=500000, model=None, **kwargs): """Initialize and sample from posterior of a continuous model. This is a convenience function. NUTS convergence and sampling speed is extremely dependent on the choice of mass/scaling matrix. In our experience, using ADVI to estimate a diagonal covariance matrix and using this as the scaling matrix produces robust results over a wide class of continuous models. Parameters ---------- init : str {'advi', 'advi_map', 'map', 'nuts'} Initialization method to use. * advi : Run ADVI to estimate posterior mean and diagonal covariance matrix. * advi_map: Initialize ADVI with MAP and use MAP as starting point. * map : Use the MAP as starting point. * nuts : Run NUTS and estimate posterior mean and covariance matrix. n_init : int Number of iterations of initializer If 'advi', number of iterations, if 'metropolis', number of draws. model : Model (optional if in `with` context) **kwargs : keyword arguments Extra keyword arguments are forwarded to pymc3.NUTS. Returns ------- start, nuts_sampler start : pymc3.model.Point Starting point for sampler nuts_sampler : pymc3.step_methods.NUTS Instantiated and initialized NUTS sampler object """ model = pm.modelcontext(model) pm._log.info('Initializing NUTS using {}...'.format(init)) if init == 'advi': v_params = pm.variational.advi(n=n_init) start = pm.variational.sample_vp(v_params, 1, progressbar=False)[0] cov = np.power(model.dict_to_array(v_params.stds), 2) elif init == 'advi_map': start = pm.find_MAP() v_params = pm.variational.advi(n=n_init, start=start) cov = np.power(model.dict_to_array(v_params.stds), 2) elif init == 'map': start = pm.find_MAP() cov = pm.find_hessian(point=start) elif init == 'nuts': init_trace = pm.sample(step=pm.NUTS(), draws=n_init) cov = pm.trace_cov(init_trace[n_init//2:]) start = {varname: np.mean(init_trace[varname]) for varname in init_trace.varnames} else: raise NotImplemented('Initializer {} is not supported.'.format(init)) step = pm.NUTS(scaling=cov, is_cov=True, **kwargs) return start, step
def _laplace(model): """ Fit a model using a laplace approximation. Mainly for pedagogical use. ``mcmc`` and ``advi`` are better approximations Parameters ---------- model: PyMC3 model Returns ------- Dictionary, the keys are the names of the variables and the values tuples of modes and standard deviations. """ with model: varis = [v for v in model.unobserved_RVs if not pm.util.is_transformed_name(v.name)] maps = pm.find_MAP(start=model.test_point, vars=varis) hessian = pm.find_hessian(maps, vars=varis) if np.linalg.det(hessian) == 0: raise np.linalg.LinAlgError("Singular matrix. Use mcmc or advi method") stds = np.diag(np.linalg.inv(hessian) ** 0.5) maps = [v for (k, v) in maps.items() if not pm.util.is_transformed_name(k)] modes = [v.item() if v.size == 1 else v for v in maps] names = [v.name for v in varis] shapes = [np.atleast_1d(mode).shape for mode in modes] stds_reshaped = [] idx0 = 0 for shape in shapes: idx1 = idx0 + sum(shape) stds_reshaped.append(np.reshape(stds[idx0:idx1], shape)) idx0 = idx1 return dict(zip(names, zip(modes, stds_reshaped)))
def test_plots_multidimensional(): # Test single trace from .models import multidimensional_model start, model, _ = multidimensional_model() with model as model: h = np.diag(find_hessian(start)) step = Metropolis(model.vars, h) trace = sample(3000, step, start) traceplot(trace)
def test_plots(): # Test single trace from pymc3.examples import arbitrary_stochastic as asmod with asmod.model as model: start = model.test_point h = find_hessian(start) step = Metropolis(model.vars, h) trace = sample(3000, step, start) traceplot(trace) forestplot(trace) autocorrplot(trace)
def make_normal_approx(vars): ''' Gets the normal approximation for the posterior near its maximum for the specified variables. Useful for getting quick summaries without sampling from, e.g. linear models. Don't use for complex models! Parameters: vars: a list of variable names Returns: a DataFrame with the MAP estimates, standard deviations, and compatible intervals estimated from the Hessian ''' map_est = pm.find_MAP() std_est = (1/pm.find_hessian(map_est, vars=vars))**0.5 data = [] for var in vars: i = vars.index(var) cv = sp.stats.norm.ppf(0.97) data.append([map_est[var.name].round(3), std_est[i,i].round(3), (map_est[var.name] - std_est[i, i] * cv).round(3), (map_est[var.name] + std_est[i, i] * cv).round(3)]) summary = pd.DataFrame(data, columns = ['map', 'sd', 'hdi_3%', 'hdi_97%'], index = vars) return summary
def test_leapfrog_reversible(): n = 3 start, model, _ = models.non_normal(n) with model: h = pm.find_hessian(start, model=model) step = pm.HamiltonianMC(model.vars, h, model=model) bij = DictToArrayBijection(step.ordering, start) logp, dlogp = list(map(bij.mapf, step.fs)) H = Hamiltonian(logp, dlogp, step.potential) q0 = bij.map(start) p0 = np.ones(n)*.05 for e in [.01, .1, 1.2]: for L in [1, 2, 3, 4, 20]: q, p = q0, p0 q, p = leapfrog(H, q, p, L, e) q, p = leapfrog(H, q, -p, L, e) close_to(q, q0, 1e-8, str((L, e))) close_to(-p, p0, 1e-8, str((L, e)))
def test_leapfrog_reversible(): n = 3 start, model, _ = models.non_normal(n) with model: h = pm.find_hessian(start, model=model) step = pm.HamiltonianMC(model.vars, h, model=model) bij = pm.DictToArrayBijection(step.ordering, start) logp, dlogp = list(map(bij.mapf, step.fs)) H = Hamiltonian(logp, dlogp, step.potential) q0 = bij.map(start) p0 = np.ones(n)*.05 for e in [.01, .1, 1.2]: for L in [1, 2, 3, 4, 20]: q, p = q0, p0 q, p = leapfrog(H, q, p, L, e) q, p = leapfrog(H, q, -p, L, e) close_to(q, q0, 1e-8, str((L, e))) close_to(-p, p0, 1e-8, str((L, e)))
# model equation sex_idx = data.sex.values height_mu = intercept[sex_idx] + beta[sex_idx] * data.weight mc.Normal('height', mu=height_mu, sd=error, observed=data.height) # In[78]: model.vars # In[79]: with model: start = mc.find_MAP() step = mc.NUTS(state=start) hessian = mc.find_hessian(start) trace = mc.sample(5000, step, start=start) # In[80]: fig, axes = plt.subplots(3, 2, figsize=(8, 6), squeeze=False) mc.traceplot(trace, vars=['intercept', 'beta', 'error'], ax=axes) fig.tight_layout() fig.savefig("ch16-multilevel-sample-trace.pdf") fig.savefig("ch16-multilevel-sample-trace.png") # In[81]: intercept_m, intercept_f = trace.get_values('intercept').mean(axis=0) # In[82]:
import pymc3 as pm with pm.Model() as model: x = pm.Normal('x', 1, 1) x2 = pm.Potential('x2', -x**2) start = model.test_point h = pm.find_hessian(start) step = pm.Metropolis(model.vars, h) def run(n=3000): if n == "short": n = 50 with model: trace = pm.sample(n, step=step, start=start) if __name__ == '__main__': run()
def init_nuts(init='auto', njobs=1, n_init=500000, model=None, random_seed=-1, progressbar=True, **kwargs): """Set up the mass matrix initialization for NUTS. NUTS convergence and sampling speed is extremely dependent on the choice of mass/scaling matrix. This function implements different methods for choosing or adapting the mass matrix. Parameters ---------- init : str Initialization method to use. * auto : Choose a default initialization method automatically. Currently, this is `'jitter+adapt_diag'`, but this can change in the future. If you depend on the exact behaviour, choose an initialization method explicitly. * adapt_diag : Start with a identity mass matrix and then adapt a diagonal based on the variance of the tuning samples. All chains use the test value (usually the prior mean) as starting point. * jitter+adapt_diag : Same as `adapt_diag`, but add uniform jitter in [-1, 1] to the starting point in each chain. * advi+adapt_diag : Run ADVI and then adapt the resulting diagonal mass matrix based on the sample variance of the tuning samples. * advi+adapt_diag_grad : Run ADVI and then adapt the resulting diagonal mass matrix based on the variance of the gradients during tuning. This is **experimental** and might be removed in a future release. * advi : Run ADVI to estimate posterior mean and diagonal mass matrix. * advi_map: Initialize ADVI with MAP and use MAP as starting point. * map : Use the MAP as starting point. This is discouraged. * nuts : Run NUTS and estimate posterior mean and mass matrix from the trace. njobs : int Number of parallel jobs to start. n_init : int Number of iterations of initializer If 'ADVI', number of iterations, if 'nuts', number of draws. model : Model (optional if in `with` context) progressbar : bool Whether or not to display a progressbar for advi sampling. **kwargs : keyword arguments Extra keyword arguments are forwarded to pymc3.NUTS. Returns ------- start : pymc3.model.Point Starting point for sampler nuts_sampler : pymc3.step_methods.NUTS Instantiated and initialized NUTS sampler object """ model = pm.modelcontext(model) vars = kwargs.get('vars', model.vars) if set(vars) != set(model.vars): raise ValueError('Must use init_nuts on all variables of a model.') if not pm.model.all_continuous(vars): raise ValueError('init_nuts can only be used for models with only ' 'continuous variables.') if not isinstance(init, str): raise TypeError('init must be a string.') if init is not None: init = init.lower() if init == 'auto': init = 'jitter+adapt_diag' pm._log.info('Initializing NUTS using {}...'.format(init)) random_seed = int(np.atleast_1d(random_seed)[0]) cb = [ pm.callbacks.CheckParametersConvergence( tolerance=1e-2, diff='absolute'), pm.callbacks.CheckParametersConvergence( tolerance=1e-2, diff='relative'), ] if init == 'adapt_diag': start = [model.test_point] * njobs mean = np.mean([model.dict_to_array(vals) for vals in start], axis=0) var = np.ones_like(mean) potential = quadpotential.QuadPotentialDiagAdapt( model.ndim, mean, var, 10) if njobs == 1: start = start[0] elif init == 'jitter+adapt_diag': start = [] for _ in range(njobs): mean = {var: val.copy() for var, val in model.test_point.items()} for val in mean.values(): val[...] += 2 * np.random.rand(*val.shape) - 1 start.append(mean) mean = np.mean([model.dict_to_array(vals) for vals in start], axis=0) var = np.ones_like(mean) potential = quadpotential.QuadPotentialDiagAdapt( model.ndim, mean, var, 10) if njobs == 1: start = start[0] elif init == 'advi+adapt_diag_grad': approx = pm.fit( random_seed=random_seed, n=n_init, method='advi', model=model, callbacks=cb, progressbar=progressbar, obj_optimizer=pm.adagrad_window, ) # type: pm.MeanField start = approx.sample(draws=njobs) start = list(start) stds = approx.bij.rmap(approx.std.eval()) cov = model.dict_to_array(stds) ** 2 mean = approx.bij.rmap(approx.mean.get_value()) mean = model.dict_to_array(mean) weight = 50 potential = quadpotential.QuadPotentialDiagAdaptGrad( model.ndim, mean, cov, weight) if njobs == 1: start = start[0] elif init == 'advi+adapt_diag': approx = pm.fit( random_seed=random_seed, n=n_init, method='advi', model=model, callbacks=cb, progressbar=progressbar, obj_optimizer=pm.adagrad_window, ) # type: pm.MeanField start = approx.sample(draws=njobs) start = list(start) stds = approx.bij.rmap(approx.std.eval()) cov = model.dict_to_array(stds) ** 2 mean = approx.bij.rmap(approx.mean.get_value()) mean = model.dict_to_array(mean) weight = 50 potential = quadpotential.QuadPotentialDiagAdapt( model.ndim, mean, cov, weight) if njobs == 1: start = start[0] elif init == 'advi': approx = pm.fit( random_seed=random_seed, n=n_init, method='advi', model=model, callbacks=cb, progressbar=progressbar, obj_optimizer=pm.adagrad_window ) # type: pm.MeanField start = approx.sample(draws=njobs) start = list(start) stds = approx.bij.rmap(approx.std.eval()) cov = model.dict_to_array(stds) ** 2 potential = quadpotential.QuadPotentialDiag(cov) if njobs == 1: start = start[0] elif init == 'advi_map': start = pm.find_MAP() approx = pm.MeanField(model=model, start=start) pm.fit( random_seed=random_seed, n=n_init, method=pm.KLqp(approx), callbacks=cb, progressbar=progressbar, obj_optimizer=pm.adagrad_window ) start = approx.sample(draws=njobs) start = list(start) stds = approx.bij.rmap(approx.std.eval()) cov = model.dict_to_array(stds) ** 2 potential = quadpotential.QuadPotentialDiag(cov) if njobs == 1: start = start[0] elif init == 'map': start = pm.find_MAP() cov = pm.find_hessian(point=start) start = [start] * njobs potential = quadpotential.QuadPotentialFull(cov) if njobs == 1: start = start[0] elif init == 'nuts': init_trace = pm.sample(draws=n_init, step=pm.NUTS(), tune=n_init // 2, random_seed=random_seed) cov = np.atleast_1d(pm.trace_cov(init_trace)) start = list(np.random.choice(init_trace, njobs)) potential = quadpotential.QuadPotentialFull(cov) if njobs == 1: start = start[0] else: raise NotImplementedError('Initializer {} is not supported.'.format(init)) step = pm.NUTS(potential=potential, **kwargs) return start, step
import pymc3 as pm with pm.Model() as model: x = pm.Normal('x', 1, 1) x2 = pm.Potential('x2', -x ** 2) start = model.test_point h = pm.find_hessian(start) step = pm.Metropolis(model.vars, h) def run(n=3000): if n == "short": n = 50 with model: pm.sample(n, step=step, start=start) if __name__ == '__main__': run()
def init_nuts(init='ADVI', njobs=1, n_init=500000, model=None, random_seed=-1, progressbar=True, **kwargs): """Initialize and sample from posterior of a continuous model. This is a convenience function. NUTS convergence and sampling speed is extremely dependent on the choice of mass/scaling matrix. In our experience, using ADVI to estimate a diagonal covariance matrix and using this as the scaling matrix produces robust results over a wide class of continuous models. Parameters ---------- init : str {'ADVI', 'ADVI_MAP', 'MAP', 'NUTS'} Initialization method to use. * ADVI : Run ADVI to estimate posterior mean and diagonal covariance matrix. * ADVI_MAP: Initialize ADVI with MAP and use MAP as starting point. * MAP : Use the MAP as starting point. * NUTS : Run NUTS and estimate posterior mean and covariance matrix. njobs : int Number of parallel jobs to start. n_init : int Number of iterations of initializer If 'ADVI', number of iterations, if 'metropolis', number of draws. model : Model (optional if in `with` context) progressbar : bool Whether or not to display a progressbar for advi sampling. **kwargs : keyword arguments Extra keyword arguments are forwarded to pymc3.NUTS. Returns ------- start : pymc3.model.Point Starting point for sampler nuts_sampler : pymc3.step_methods.NUTS Instantiated and initialized NUTS sampler object """ model = pm.modelcontext(model) pm._log.info('Initializing NUTS using {}...'.format(init)) random_seed = int(np.atleast_1d(random_seed)[0]) if init is not None: init = init.lower() cb = [ pm.callbacks.CheckParametersConvergence(tolerance=1e-2, diff='absolute'), pm.callbacks.CheckParametersConvergence(tolerance=1e-2, diff='relative'), ] if init == 'advi': approx = pm.fit(random_seed=random_seed, n=n_init, method='advi', model=model, callbacks=cb, progressbar=progressbar, obj_optimizer=pm.adagrad_window) # type: pm.MeanField start = approx.sample(draws=njobs) stds = approx.gbij.rmap(approx.std.eval()) cov = model.dict_to_array(stds)**2 if njobs == 1: start = start[0] elif init == 'advi_map': start = pm.find_MAP() approx = pm.MeanField(model=model, start=start) pm.fit(random_seed=random_seed, n=n_init, method=pm.ADVI.from_mean_field(approx), callbacks=cb, progressbar=progressbar, obj_optimizer=pm.adagrad_window) start = approx.sample(draws=njobs) stds = approx.gbij.rmap(approx.std.eval()) cov = model.dict_to_array(stds)**2 if njobs == 1: start = start[0] elif init == 'map': start = pm.find_MAP() cov = pm.find_hessian(point=start) elif init == 'nuts': init_trace = pm.sample(draws=n_init, step=pm.NUTS(), tune=n_init // 2, random_seed=random_seed) cov = np.atleast_1d(pm.trace_cov(init_trace)) start = np.random.choice(init_trace, njobs) if njobs == 1: start = start[0] else: raise NotImplementedError( 'Initializer {} is not supported.'.format(init)) step = pm.NUTS(scaling=cov, is_cov=True, **kwargs) return start, step
def init_nuts(init='ADVI', njobs=1, n_init=500000, model=None, random_seed=-1, **kwargs): """Initialize and sample from posterior of a continuous model. This is a convenience function. NUTS convergence and sampling speed is extremely dependent on the choice of mass/scaling matrix. In our experience, using ADVI to estimate a diagonal covariance matrix and using this as the scaling matrix produces robust results over a wide class of continuous models. Parameters ---------- init : str {'ADVI', 'ADVI_MAP', 'MAP', 'NUTS'} Initialization method to use. * ADVI : Run ADVI to estimate posterior mean and diagonal covariance matrix. * ADVI_MAP: Initialize ADVI with MAP and use MAP as starting point. * MAP : Use the MAP as starting point. * NUTS : Run NUTS and estimate posterior mean and covariance matrix. njobs : int Number of parallel jobs to start. n_init : int Number of iterations of initializer If 'ADVI', number of iterations, if 'metropolis', number of draws. model : Model (optional if in `with` context) **kwargs : keyword arguments Extra keyword arguments are forwarded to pymc3.NUTS. Returns ------- start, nuts_sampler start : pymc3.model.Point Starting point for sampler nuts_sampler : pymc3.step_methods.NUTS Instantiated and initialized NUTS sampler object """ model = pm.modelcontext(model) pm._log.info('Initializing NUTS using {}...'.format(init)) random_seed = int(np.atleast_1d(random_seed)[0]) if init is not None: init = init.lower() if init == 'advi': v_params = pm.variational.advi(n=n_init, random_seed=random_seed) start = pm.variational.sample_vp(v_params, njobs, progressbar=False, hide_transformed=False, random_seed=random_seed) if njobs == 1: start = start[0] cov = np.power(model.dict_to_array(v_params.stds), 2) elif init == 'advi_map': start = pm.find_MAP() v_params = pm.variational.advi(n=n_init, start=start, random_seed=random_seed) cov = np.power(model.dict_to_array(v_params.stds), 2) elif init == 'map': start = pm.find_MAP() cov = pm.find_hessian(point=start) elif init == 'nuts': init_trace = pm.sample(step=pm.NUTS(), draws=n_init, random_seed=random_seed)[n_init // 2:] cov = np.atleast_1d(pm.trace_cov(init_trace)) start = np.random.choice(init_trace, njobs) if njobs == 1: start = start[0] else: raise NotImplementedError( 'Initializer {} is not supported.'.format(init)) step = pm.NUTS(scaling=cov, is_cov=True, **kwargs) return start, step
sf = dx_grid / dx_exact # Jacobian scale factor plt.figure() #plt.stem(grid, posterior, use_line_collection=True) plt.bar(grid, posterior, width=1 / n, alpha=0.2) plt.plot(xs, post_exact * sf) plt.title('grid approximation') plt.yticks([]) plt.xlabel('θ') plt.savefig('../figures/bb_grid.pdf') # Laplace with pm.Model() as normal_aproximation: theta = pm.Beta('theta', 1., 1.) y = pm.Binomial('y', n=1, p=theta, observed=data) # Bernoulli mean_q = pm.find_MAP() std_q = ((1 / pm.find_hessian(mean_q, vars=[theta]))**0.5)[0] mu = mean_q['theta'] print([mu, std_q]) plt.figure() plt.plot(xs, stats.norm.pdf(xs, mu, std_q), '--', label='Laplace') post_exact = stats.beta.pdf(xs, h + 1, t + 1) plt.plot(xs, post_exact, label='exact') plt.title('Quadratic approximation') plt.xlabel('θ', fontsize=14) plt.yticks([]) plt.legend() plt.savefig('../figures/bb_laplace.pdf') # HMC
def init_nuts(init='auto', chains=1, n_init=500000, model=None, random_seed=None, progressbar=True, **kwargs): """Set up the mass matrix initialization for NUTS. NUTS convergence and sampling speed is extremely dependent on the choice of mass/scaling matrix. This function implements different methods for choosing or adapting the mass matrix. Parameters ---------- init : str Initialization method to use. * auto : Choose a default initialization method automatically. Currently, this is `'jitter+adapt_diag'`, but this can change in the future. If you depend on the exact behaviour, choose an initialization method explicitly. * adapt_diag : Start with a identity mass matrix and then adapt a diagonal based on the variance of the tuning samples. All chains use the test value (usually the prior mean) as starting point. * jitter+adapt_diag : Same as `adapt_diag`, but add uniform jitter in [-1, 1] to the starting point in each chain. * advi+adapt_diag : Run ADVI and then adapt the resulting diagonal mass matrix based on the sample variance of the tuning samples. * advi+adapt_diag_grad : Run ADVI and then adapt the resulting diagonal mass matrix based on the variance of the gradients during tuning. This is **experimental** and might be removed in a future release. * advi : Run ADVI to estimate posterior mean and diagonal mass matrix. * advi_map: Initialize ADVI with MAP and use MAP as starting point. * map : Use the MAP as starting point. This is discouraged. * nuts : Run NUTS and estimate posterior mean and mass matrix from the trace. chains : int Number of jobs to start. n_init : int Number of iterations of initializer If 'ADVI', number of iterations, if 'nuts', number of draws. model : Model (optional if in `with` context) progressbar : bool Whether or not to display a progressbar for advi sampling. **kwargs : keyword arguments Extra keyword arguments are forwarded to pymc3.NUTS. Returns ------- start : pymc3.model.Point Starting point for sampler nuts_sampler : pymc3.step_methods.NUTS Instantiated and initialized NUTS sampler object """ model = pm.modelcontext(model) vars = kwargs.get('vars', model.vars) if set(vars) != set(model.vars): raise ValueError('Must use init_nuts on all variables of a model.') if not pm.model.all_continuous(vars): raise ValueError('init_nuts can only be used for models with only ' 'continuous variables.') if not isinstance(init, str): raise TypeError('init must be a string.') if init is not None: init = init.lower() if init == 'auto': init = 'jitter+adapt_diag' pm._log.info('Initializing NUTS using {}...'.format(init)) if random_seed is not None: random_seed = int(np.atleast_1d(random_seed)[0]) np.random.seed(random_seed) cb = [ pm.callbacks.CheckParametersConvergence(tolerance=1e-2, diff='absolute'), pm.callbacks.CheckParametersConvergence(tolerance=1e-2, diff='relative'), ] if init == 'adapt_diag': start = [model.test_point] * chains mean = np.mean([model.dict_to_array(vals) for vals in start], axis=0) var = np.ones_like(mean) potential = quadpotential.QuadPotentialDiagAdapt( model.ndim, mean, var, 10) elif init == 'jitter+adapt_diag': start = [] for _ in range(chains): mean = {var: val.copy() for var, val in model.test_point.items()} for val in mean.values(): val[...] += 2 * np.random.rand(*val.shape) - 1 start.append(mean) mean = np.mean([model.dict_to_array(vals) for vals in start], axis=0) var = np.ones_like(mean) potential = quadpotential.QuadPotentialDiagAdapt( model.ndim, mean, var, 10) elif init == 'advi+adapt_diag_grad': approx = pm.fit( random_seed=random_seed, n=n_init, method='advi', model=model, callbacks=cb, progressbar=progressbar, obj_optimizer=pm.adagrad_window, ) # type: pm.MeanField start = approx.sample(draws=chains) start = list(start) stds = approx.bij.rmap(approx.std.eval()) cov = model.dict_to_array(stds)**2 mean = approx.bij.rmap(approx.mean.get_value()) mean = model.dict_to_array(mean) weight = 50 potential = quadpotential.QuadPotentialDiagAdaptGrad( model.ndim, mean, cov, weight) elif init == 'advi+adapt_diag': approx = pm.fit( random_seed=random_seed, n=n_init, method='advi', model=model, callbacks=cb, progressbar=progressbar, obj_optimizer=pm.adagrad_window, ) # type: pm.MeanField start = approx.sample(draws=chains) start = list(start) stds = approx.bij.rmap(approx.std.eval()) cov = model.dict_to_array(stds)**2 mean = approx.bij.rmap(approx.mean.get_value()) mean = model.dict_to_array(mean) weight = 50 potential = quadpotential.QuadPotentialDiagAdapt( model.ndim, mean, cov, weight) elif init == 'advi': approx = pm.fit(random_seed=random_seed, n=n_init, method='advi', model=model, callbacks=cb, progressbar=progressbar, obj_optimizer=pm.adagrad_window) # type: pm.MeanField start = approx.sample(draws=chains) start = list(start) stds = approx.bij.rmap(approx.std.eval()) cov = model.dict_to_array(stds)**2 potential = quadpotential.QuadPotentialDiag(cov) elif init == 'advi_map': start = pm.find_MAP(include_transformed=True) approx = pm.MeanField(model=model, start=start) pm.fit(random_seed=random_seed, n=n_init, method=pm.KLqp(approx), callbacks=cb, progressbar=progressbar, obj_optimizer=pm.adagrad_window) start = approx.sample(draws=chains) start = list(start) stds = approx.bij.rmap(approx.std.eval()) cov = model.dict_to_array(stds)**2 potential = quadpotential.QuadPotentialDiag(cov) elif init == 'map': start = pm.find_MAP(include_transformed=True) cov = pm.find_hessian(point=start) start = [start] * chains potential = quadpotential.QuadPotentialFull(cov) elif init == 'nuts': init_trace = pm.sample(draws=n_init, step=pm.NUTS(), tune=n_init // 2, random_seed=random_seed) cov = np.atleast_1d(pm.trace_cov(init_trace)) start = list(np.random.choice(init_trace, chains)) potential = quadpotential.QuadPotentialFull(cov) else: raise NotImplementedError( 'Initializer {} is not supported.'.format(init)) step = pm.NUTS(potential=potential, **kwargs) return start, step
def init_nuts(init='advi', n_init=500000, model=None): """Initialize and sample from posterior of a continuous model. This is a convenience function. NUTS convergence and sampling speed is extremely dependent on the choice of mass/scaling matrix. In our experience, using ADVI to estimate a diagonal covariance matrix and using this as the scaling matrix produces robust results over a wide class of continuous models. Parameters ---------- init : str {'advi', 'advi_map', 'map', 'nuts'} Initialization method to use. * advi : Run ADVI to estimate posterior mean and diagonal covariance matrix. * advi_map: Initialize ADVI with MAP and use MAP as starting point. * map : Use the MAP as starting point. * nuts : Run NUTS and estimate posterior mean and covariance matrix. n_init : int Number of iterations of initializer If 'advi', number of iterations, if 'metropolis', number of draws. model : Model (optional if in `with` context) Returns ------- start, nuts_sampler start : pymc3.model.Point Starting point for sampler nuts_sampler : pymc3.step_methods.NUTS Instantiated and initialized NUTS sampler object """ model = pm.modelcontext(model) pm._log.info('Initializing NUTS using {}...'.format(init)) if init == 'advi': v_params = pm.variational.advi(n=n_init) start = pm.variational.sample_vp(v_params, 1, progressbar=False)[0] cov = np.power(model.dict_to_array(v_params.stds), 2) elif init == 'advi_map': start = pm.find_MAP() v_params = pm.variational.advi(n=n_init, start=start) cov = np.power(model.dict_to_array(v_params.stds), 2) elif init == 'map': start = pm.find_MAP() cov = pm.find_hessian(point=start) elif init == 'nuts': init_trace = pm.sample(step=pm.NUTS(), draws=n_init) cov = pm.trace_cov(init_trace[n_init // 2:]) start = { varname: np.mean(init_trace[varname]) for varname in init_trace.varnames } else: raise NotImplemented('Initializer {} is not supported.'.format(init)) step = pm.NUTS(scaling=cov, is_cov=True) return start, step
_m.x = np.linspace(-0.1, 1.1, 100) _m.f_x = 2 * _m.x**2 * (1 - _m.x)**2 plt.plot(_m.x, _m.f_x) # As the analytical solution shows we have three points where the derivative is zero: $x=0$, $x=1$, and $x=\frac{1}{2}$. The value $x=\frac{1}{2}$ maximizes the function, and is the answer. # # 2.6 # + _26 = Object() _26.data = np.repeat((0, 1), (3, 6)) with pm.Model() as _26.na: _26.p = pm.Uniform('p', 0, 1) _26.w = pm.Binomial('w', n=len(_26.data), p=_26.p, observed=_26.data.sum()) _26.mean_p = pm.find_MAP() _26.std_q = ((1 / pm.find_hessian(_26.mean_p, vars=[_26.p]))**0.5)[0] _26.mean_p['p'], _26.std_q # - # Assuming the posterior is Gaussian, it's maximized at $0.67$ and its standard deviation is $0.16$. # 89% confidence interval: _26.norm_dist = stats.norm(_26.mean_p['p'], _26.std_q) _26.z = stats.norm.ppf([(1 - .89) / 2, 1 - (1 - 0.89) / 2]) print("89% confidence interval:", _26.mean_p['p'] + _26.std_q * _26.z) # # Medium # ## 2M1