def get_step_for_trace( self, trace=None, model=None, regular_window=0, regular_variance=1e-3, **kwargs, ): """Get a PyMC3 NUTS step tuned for a given burn-in trace Args: trace: The ``MultiTrace`` output from a previous run of ``pymc3.sample``. regular_window: The weight (in units of number of steps) to use when regularizing the mass matrix estimate. regular_variance: The amplitude of the regularization for the mass matrix. This will be added to the diagonal of the covariance matrix with weight given by ``regular_window``. """ model = pm.modelcontext(model) # If not given, use the trivial metric if trace is None or model.ndim == 1: potential = quad.QuadPotentialDiag(np.ones(model.ndim)) else: # Loop over samples and convert to the relevant parameter space; # I'm sure that there's an easier way to do this, but I don't know # how to make something work in general... N = len(trace) * trace.nchains samples = np.empty((N, model.ndim)) i = 0 for chain in trace._straces.values(): for p in chain: samples[i] = model.bijection.map(p) i += 1 if self.dense: # Compute the regularized sample covariance cov = np.cov(samples, rowvar=0) if regular_window > 0: cov = cov * N / (N + regular_window) cov[np.diag_indices_from(cov)] += (regular_variance * regular_window / (N + regular_window)) potential = quad.QuadPotentialFull(cov) else: var = np.var(samples, axis=0) if regular_window > 0: var = var * N / (N + regular_window) var += (regular_variance * regular_window / (N + regular_window)) potential = quad.QuadPotentialDiag(var) return pm.NUTS(potential=potential, **kwargs)
def test_random_dense(): np.random.seed(42) for _ in range(3): cov = np.random.rand(5, 5) cov += cov.T cov += 10 * np.eye(5) inv = np.linalg.inv(cov) assert np.allclose(inv.dot(cov), np.eye(5)) pots = [ quadpotential.QuadPotentialFull(cov), quadpotential.QuadPotentialFullInv(inv), ] if quadpotential.chol_available: pot = quadpotential.QuadPotential_Sparse(scipy.sparse.csc_matrix(cov)) pots.append(pot) for pot in pots: cov_ = np.cov(np.array([pot.random() for _ in range(1000)]).T) assert np.allclose(cov_, inv, atol=0.1)
times_consumed = [] N = 100 alphas = norm.rvs(size=N) betas = halfnorm.rvs(size=(N, 2)) sigmas = halfnorm.rvs(size=N) print("=========================================") print(f"Tuning NUTS") print("=========================================") n_chains = 4 init_trace = pm.sample(draws=1000, tune=1000, cores=n_chains) cov = np.atleast_1d(pm.trace_cov(init_trace)) start = list(np.random.choice(init_trace, n_chains)) potential = quadpotential.QuadPotentialFull(cov) step_size = init_trace.get_sampler_stats("step_size_bar")[-1] size = m.bijection.ordering.size step_scale = step_size * (size**0.25) # with pm.Model() as model_new: # reset model. If you use theano.shared you can also update the value of model1 above for i in range(N): # No-U-Turn Sampler NUTS print("=========================================") print(f"Turn {i}") print("=========================================") # start = {"alpha": alphas[i], "beta": betas[i], "sigma": sigmas[i]} time_zero = default_timer() step = pm.NUTS(potential=potential, adapt_step_size=False, step_scale=step_scale)
def init_nuts(init='auto', chains=1, n_init=500000, model=None, random_seed=None, progressbar=True, **kwargs): """Set up the mass matrix initialization for NUTS. NUTS convergence and sampling speed is extremely dependent on the choice of mass/scaling matrix. This function implements different methods for choosing or adapting the mass matrix. Parameters ---------- init : str Initialization method to use. * auto : Choose a default initialization method automatically. Currently, this is `'jitter+adapt_diag'`, but this can change in the future. If you depend on the exact behaviour, choose an initialization method explicitly. * adapt_diag : Start with a identity mass matrix and then adapt a diagonal based on the variance of the tuning samples. All chains use the test value (usually the prior mean) as starting point. * jitter+adapt_diag : Same as `adapt_diag`, but add uniform jitter in [-1, 1] to the starting point in each chain. * advi+adapt_diag : Run ADVI and then adapt the resulting diagonal mass matrix based on the sample variance of the tuning samples. * advi+adapt_diag_grad : Run ADVI and then adapt the resulting diagonal mass matrix based on the variance of the gradients during tuning. This is **experimental** and might be removed in a future release. * advi : Run ADVI to estimate posterior mean and diagonal mass matrix. * advi_map: Initialize ADVI with MAP and use MAP as starting point. * map : Use the MAP as starting point. This is discouraged. * nuts : Run NUTS and estimate posterior mean and mass matrix from the trace. chains : int Number of jobs to start. n_init : int Number of iterations of initializer If 'ADVI', number of iterations, if 'nuts', number of draws. model : Model (optional if in `with` context) progressbar : bool Whether or not to display a progressbar for advi sampling. **kwargs : keyword arguments Extra keyword arguments are forwarded to pymc3.NUTS. Returns ------- start : pymc3.model.Point Starting point for sampler nuts_sampler : pymc3.step_methods.NUTS Instantiated and initialized NUTS sampler object """ model = pm.modelcontext(model) vars = kwargs.get('vars', model.vars) if set(vars) != set(model.vars): raise ValueError('Must use init_nuts on all variables of a model.') if not pm.model.all_continuous(vars): raise ValueError('init_nuts can only be used for models with only ' 'continuous variables.') if not isinstance(init, str): raise TypeError('init must be a string.') if init is not None: init = init.lower() if init == 'auto': init = 'jitter+adapt_diag' pm._log.info('Initializing NUTS using {}...'.format(init)) if random_seed is not None: random_seed = int(np.atleast_1d(random_seed)[0]) np.random.seed(random_seed) cb = [ pm.callbacks.CheckParametersConvergence(tolerance=1e-2, diff='absolute'), pm.callbacks.CheckParametersConvergence(tolerance=1e-2, diff='relative'), ] if init == 'adapt_diag': start = [model.test_point] * chains mean = np.mean([model.dict_to_array(vals) for vals in start], axis=0) var = np.ones_like(mean) potential = quadpotential.QuadPotentialDiagAdapt( model.ndim, mean, var, 10) elif init == 'jitter+adapt_diag': start = [] for _ in range(chains): mean = {var: val.copy() for var, val in model.test_point.items()} for val in mean.values(): val[...] += 2 * np.random.rand(*val.shape) - 1 start.append(mean) mean = np.mean([model.dict_to_array(vals) for vals in start], axis=0) var = np.ones_like(mean) potential = quadpotential.QuadPotentialDiagAdapt( model.ndim, mean, var, 10) elif init == 'advi+adapt_diag_grad': approx = pm.fit( random_seed=random_seed, n=n_init, method='advi', model=model, callbacks=cb, progressbar=progressbar, obj_optimizer=pm.adagrad_window, ) # type: pm.MeanField start = approx.sample(draws=chains) start = list(start) stds = approx.bij.rmap(approx.std.eval()) cov = model.dict_to_array(stds)**2 mean = approx.bij.rmap(approx.mean.get_value()) mean = model.dict_to_array(mean) weight = 50 potential = quadpotential.QuadPotentialDiagAdaptGrad( model.ndim, mean, cov, weight) elif init == 'advi+adapt_diag': approx = pm.fit( random_seed=random_seed, n=n_init, method='advi', model=model, callbacks=cb, progressbar=progressbar, obj_optimizer=pm.adagrad_window, ) # type: pm.MeanField start = approx.sample(draws=chains) start = list(start) stds = approx.bij.rmap(approx.std.eval()) cov = model.dict_to_array(stds)**2 mean = approx.bij.rmap(approx.mean.get_value()) mean = model.dict_to_array(mean) weight = 50 potential = quadpotential.QuadPotentialDiagAdapt( model.ndim, mean, cov, weight) elif init == 'advi': approx = pm.fit(random_seed=random_seed, n=n_init, method='advi', model=model, callbacks=cb, progressbar=progressbar, obj_optimizer=pm.adagrad_window) # type: pm.MeanField start = approx.sample(draws=chains) start = list(start) stds = approx.bij.rmap(approx.std.eval()) cov = model.dict_to_array(stds)**2 potential = quadpotential.QuadPotentialDiag(cov) elif init == 'advi_map': start = pm.find_MAP(include_transformed=True) approx = pm.MeanField(model=model, start=start) pm.fit(random_seed=random_seed, n=n_init, method=pm.KLqp(approx), callbacks=cb, progressbar=progressbar, obj_optimizer=pm.adagrad_window) start = approx.sample(draws=chains) start = list(start) stds = approx.bij.rmap(approx.std.eval()) cov = model.dict_to_array(stds)**2 potential = quadpotential.QuadPotentialDiag(cov) elif init == 'map': start = pm.find_MAP(include_transformed=True) cov = pm.find_hessian(point=start) start = [start] * chains potential = quadpotential.QuadPotentialFull(cov) elif init == 'nuts': init_trace = pm.sample(draws=n_init, step=pm.NUTS(), tune=n_init // 2, random_seed=random_seed) cov = np.atleast_1d(pm.trace_cov(init_trace)) start = list(np.random.choice(init_trace, chains)) potential = quadpotential.QuadPotentialFull(cov) else: raise NotImplementedError( 'Initializer {} is not supported.'.format(init)) step = pm.NUTS(potential=potential, **kwargs) return start, step