def __init__(self, vars=None, scaling=None, step_scale=0.25, is_cov=False, model=None, blocked=True, potential=None, integrator="leapfrog", dtype=None, **theano_kwargs): """Set up Hamiltonian samplers with common structures. Parameters ---------- vars : list of theano variables scaling : array_like, ndim = {1,2} Scaling for momentum distribution. 1d arrays interpreted matrix diagonal. step_scale : float, default=0.25 Size of steps to take, automatically scaled down by 1/n**(1/4) is_cov : bool, default=False Treat scaling as a covariance matrix/vector if True, else treat it as a precision matrix/vector model : pymc3 Model instance blocked: bool, default=True potential : Potential, optional An object that represents the Hamiltonian with methods `velocity`, `energy`, and `random` methods. **theano_kwargs: passed to theano functions """ model = modelcontext(model) if vars is None: vars = model.cont_vars vars = inputvars(vars) super(BaseHMC, self).__init__(vars, blocked=blocked, model=model, dtype=dtype, **theano_kwargs) size = self._logp_dlogp_func.size if scaling is None and potential is None: mean = floatX(np.zeros(size)) var = floatX(np.ones(size)) potential = QuadPotentialDiagAdapt(size, mean, var, 10) if isinstance(scaling, dict): point = Point(scaling, model=model) scaling = guess_scaling(point, model=model, vars=vars) if scaling is not None and potential is not None: raise ValueError("Can not specify both potential and scaling.") self.step_size = step_scale / (size ** 0.25) if potential is not None: self.potential = potential else: self.potential = quad_potential(scaling, is_cov) self.integrator = integration.CpuLeapfrogIntegrator(self.potential, self._logp_dlogp_func)
def __init__(self, vars=None, scaling=None, step_scale=0.25, is_cov=False, model=None, blocked=True, use_single_leapfrog=False, potential=None, integrator="leapfrog", **theano_kwargs): """Superclass to implement Hamiltonian/hybrid monte carlo Parameters ---------- vars : list of theano variables scaling : array_like, ndim = {1,2} Scaling for momentum distribution. 1d arrays interpreted matrix diagonal. step_scale : float, default=0.25 Size of steps to take, automatically scaled down by 1/n**(1/4) is_cov : bool, default=False Treat scaling as a covariance matrix/vector if True, else treat it as a precision matrix/vector model : pymc3 Model instance. default=Context model blocked: Boolean, default True use_single_leapfrog: Boolean, will leapfrog steps take a single step at a time. default False. potential : Potential, optional An object that represents the Hamiltonian with methods `velocity`, `energy`, and `random` methods. **theano_kwargs: passed to theano functions """ model = modelcontext(model) if vars is None: vars = model.cont_vars vars = inputvars(vars) if scaling is None and potential is None: scaling = model.test_point if isinstance(scaling, dict): scaling = guess_scaling(Point(scaling, model=model), model=model, vars=vars) if scaling is not None and potential is not None: raise ValueError("Can not specify both potential and scaling.") self.step_size = step_scale / (model.ndim ** 0.25) if potential is not None: self.potential = potential else: self.potential = quad_potential(scaling, is_cov, as_cov=False) shared = make_shared_replacements(vars, model) if theano_kwargs is None: theano_kwargs = {} self.H, self.compute_energy, self.compute_velocity, self.leapfrog, self.dlogp = get_theano_hamiltonian_functions( vars, shared, model.logpt, self.potential, use_single_leapfrog, integrator, **theano_kwargs) super(BaseHMC, self).__init__(vars, shared, blocked=blocked)
def setup_default_model(n_planets, datasets, min_period=None, max_period=None, min_amp=None, max_amp=None, circular=True, trend_order=0, model=None): model = modelcontext(model) if isinstance(datasets, collections.Iterable): datasets = datasets else: datasets = [datasets] x, y, yerr = [], [], [] for data in datasets: x.append(data.t) y.append(data.rv) if data.rverr is not None: yerr.append(data.rverr) x = np.concatenate(x) y = np.concatenate(y) if len(yerr): yerr = np.concatenate(yerr) if len(yerr) != len(x): yerr = None else: yerr = None if min_period is None: min_period = np.mean(np.diff(np.sort(x))) if max_period is None: max_period = 0.5 * (x.max() - x.min()) if min_amp is None: if yerr is None: min_amp = 0.001 * np.std(y) else: min_amp = 0.01 * np.min(yerr) if max_amp is None: max_amp = 1.5 * (y.max() - y.min()) peaks = find_peaks(n_planets, x, y, yerr, min_period=min_period, max_period=max_period) with model: planets = [] for peak, name in zip(peaks, string.ascii_lowercase[1:]): logP = pm.Uniform(name + ":logP", lower=np.log(min_period), upper=np.log(max_period), testval=np.log(peak["period"])) logK = pm.Uniform(name + ":logK", lower=np.log(min_amp), upper=np.log(max_amp), testval=np.log( np.clip(peak["amp"], min_amp + 1e-2, max_amp - 1e-2))) eccen = None if not circular: eccen = pm.Beta(name + ":eccen", alpha=0.867, beta=3.03, testval=0.001) planets.append( RVPlanet(name, logP, logK, phi=peak["phase"], eccen=eccen)) if len(planets) > 1: pm.Potential( "order:{0}".format(name), tt.switch((planets[-2].logK < planets[-1].logK), 0.0, -np.inf)) rvmodel = RVModel("rv", datasets, planets) pm.Deterministic("logp", model.logpt) return rvmodel
def fast_sample_posterior_predictive( trace: Union[MultiTrace, Dataset, InferenceData, List[Dict[str, np.ndarray]]], samples: Optional[int] = None, model: Optional[Model] = None, var_names: Optional[List[str]] = None, keep_size: bool = False, random_seed=None, ) -> Dict[str, np.ndarray]: """Generate posterior predictive samples from a model given a trace. This is a vectorized alternative to the standard ``sample_posterior_predictive`` function. It aims to be as compatible as possible with the original API, and is significantly faster. Both posterior predictive sampling functions have some remaining issues, and we encourage users to verify agreement across the results of both functions for the time being. Parameters ---------- trace: MultiTrace, xarray.Dataset, InferenceData, or List of points (dictionary) Trace generated from MCMC sampling. samples: int, optional Number of posterior predictive samples to generate. Defaults to one posterior predictive sample per posterior sample, that is, the number of draws times the number of chains. It is not recommended to modify this value; when modified, some chains may not be represented in the posterior predictive sample. model: Model (optional if in `with` context) Model used to generate `trace` var_names: Iterable[str] List of vars to sample. keep_size: bool, optional Force posterior predictive sample to have the same shape as posterior and sample stats data: ``(nchains, ndraws, ...)``. random_seed: int Seed for the random number generator. Returns ------- samples: dict Dictionary with the variable names as keys, and values numpy arrays containing posterior predictive samples. """ ### Implementation note: primarily this function canonicalizes the arguments: ### Establishing the model context, wrangling the number of samples, ### Canonicalizing the trace argument into a _TraceDict object and fitting it ### to the requested number of samples. Then it invokes posterior_predictive_draw_values ### *repeatedly*. It does this repeatedly, because the trace argument is set up to be ### the same as the number of samples. So if the number of samples requested is ### greater than the number of samples in the trace parameter, we sample repeatedly. This ### makes the shape issues just a little easier to deal with. if isinstance(trace, InferenceData): nchains, ndraws = chains_and_samples(trace) trace = dataset_to_point_list(trace.posterior) elif isinstance(trace, Dataset): nchains, ndraws = chains_and_samples(trace) trace = dataset_to_point_list(trace) elif isinstance(trace, MultiTrace): nchains = trace.nchains ndraws = len(trace) else: if keep_size: # arguably this should be just a warning. raise IncorrectArgumentsError( "For keep_size, cannot identify chains and length from %s.", trace) model = modelcontext(model) assert model is not None with model: if keep_size and samples is not None: raise IncorrectArgumentsError( "Should not specify both keep_size and samples arguments") if isinstance(trace, list) and all(isinstance(x, dict) for x in trace): _trace = _TraceDict(point_list=trace) elif isinstance(trace, MultiTrace): _trace = _TraceDict(multi_trace=trace) else: raise TypeError( "Unable to generate posterior predictive samples from argument of type %s" % type(trace)) len_trace = len(_trace) assert isinstance(_trace, _TraceDict) _samples: List[int] = [] # temporary replacement for more complicated logic. max_samples: int = len_trace if samples is None or samples == max_samples: _samples = [max_samples] elif samples < max_samples: warnings.warn( "samples parameter is smaller than nchains times ndraws, some draws " "and/or chains may not be represented in the returned posterior " "predictive sample") # if this is less than the number of samples in the trace, take a slice and # work with that. _trace = _trace[slice(samples)] _samples = [samples] elif samples > max_samples: full, rem = divmod(samples, max_samples) _samples = (full * [max_samples]) + ([rem] if rem != 0 else []) else: raise IncorrectArgumentsError( "Unexpected combination of samples (%s) and max_samples (%d)" % (samples, max_samples)) if var_names is None: vars = model.observed_RVs else: vars = [model[x] for x in var_names] if random_seed is not None: np.random.seed(random_seed) if TYPE_CHECKING: _ETPParent = UserDict[str, np.ndarray] # this is only processed by mypy else: # this is not seen by mypy but will be executed at runtime. _ETPParent = UserDict class _ExtendableTrace(_ETPParent): def extend_trace(self, trace: Dict[str, np.ndarray]) -> None: for k, v in trace.items(): if k in self.data: self.data[k] = np.concatenate((self.data[k], v)) else: self.data[k] = v ppc_trace = _ExtendableTrace() for s in _samples: strace = _trace if s == len_trace else _trace[slice(0, s)] try: values = posterior_predictive_draw_values( cast(List[Any], vars), strace, s) new_trace: Dict[str, np.ndarray] = { k.name: v for (k, v) in zip(vars, values) } ppc_trace.extend_trace(new_trace) except KeyboardInterrupt: pass if keep_size: return { k: ary.reshape((nchains, ndraws, *ary.shape[1:])) for k, ary in ppc_trace.items() } # this gets us a Dict[str, np.ndarray] instead of my wrapped equiv. return ppc_trace.data
def svgd(vars=None, n=5000, n_particles=100, jitter=.01, optimizer=adagrad, start=None, progressbar=True, random_seed=None, model=None): if random_seed is not None: np.random.seed(random_seed) model = modelcontext(model) if vars is None: vars = model.vars vars = pm.inputvars(vars) if start is None: start = model.test_point start = model.dict_to_array(start) # Initialize particles x0 = np.tile(start, (n_particles, 1)) x0 += np.random.normal(0, jitter, x0.shape) theta = theano.shared(x0) # Create theano svgd gradient expression and function logp_grad_vec = _make_vectorized_logp_grad(vars, model, theta) svgd_grad = -1 * _svgd_gradient(vars, model, theta, logp_grad_vec) # maximize svgd_updates = optimizer([svgd_grad], [theta], learning_rate=1e-3) i = tt.iscalar('i') svgd_step = theano.function([i], [i], updates=svgd_updates) # Run svgd optimization if progressbar: progress = tqdm(np.arange(n)) else: progress = np.arange(n) try: for ii in progress: svgd_step(ii) except KeyboardInterrupt: pass finally: if hasattr(progress, 'close'): progress.close() theta_val = theta.get_value() # Build trace strace = pm.backends.NDArray() try: strace.setup(theta_val.shape[0], 1) for p in theta_val: strace.record(model.bijection.rmap(p)) except KeyboardInterrupt: pass finally: strace.close() trace = pm.backends.base.MultiTrace([strace]) return trace
def find_MAP(start=None, vars=None, method="L-BFGS-B", return_raw=False, include_transformed=True, progressbar=True, maxeval=5000, model=None, *args, **kwargs): """ Finds the local maximum a posteriori point given a model. find_MAP should not be used to initialize the NUTS sampler. Simply call pymc3.sample() and it will automatically initialize NUTS in a better way. Parameters ---------- start: `dict` of parameter values (Defaults to `model.test_point`) vars: list List of variables to optimize and set to optimum (Defaults to all continuous). method: string or callable Optimization algorithm (Defaults to 'L-BFGS-B' unless discrete variables are specified in `vars`, then `Powell` which will perform better). For instructions on use of a callable, refer to SciPy's documentation of `optimize.minimize`. return_raw: bool Whether to return the full output of scipy.optimize.minimize (Defaults to `False`) include_transformed: bool, optional defaults to True Flag for reporting automatically transformed variables in addition to original variables. progressbar: bool, optional defaults to True Whether or not to display a progress bar in the command line. maxeval: int, optional, defaults to 5000 The maximum number of times the posterior distribution is evaluated. model: Model (optional if in `with` context) *args, **kwargs Extra args passed to scipy.optimize.minimize Notes ----- Older code examples used find_MAP() to initialize the NUTS sampler, but this is not an effective way of choosing starting values for sampling. As a result, we have greatly enhanced the initialization of NUTS and wrapped it inside pymc3.sample() and you should thus avoid this method. """ model = modelcontext(model) if start is None: start = model.test_point else: update_start_vals(start, model.test_point, model) check_start_vals(start, model) if vars is None: vars = model.cont_vars vars = inputvars(vars) disc_vars = list(typefilter(vars, discrete_types)) allinmodel(vars, model) start = Point(start, model=model) bij = DictToArrayBijection(ArrayOrdering(vars), start) logp_func = bij.mapf(model.fastlogp_nojac) x0 = bij.map(start) try: dlogp_func = bij.mapf(model.fastdlogp_nojac(vars)) compute_gradient = True except (AttributeError, NotImplementedError, tg.NullTypeGradError): compute_gradient = False if disc_vars or not compute_gradient: pm._log.warning( "Warning: gradient not available." + "(E.g. vars contains discrete variables). MAP " + "estimates may not be accurate for the default " + "parameters. Defaulting to non-gradient minimization " + "'Powell'.") method = "Powell" if "fmin" in kwargs: fmin = kwargs.pop("fmin") warnings.warn( "In future versions, set the optimization algorithm with a string. " 'For example, use `method="L-BFGS-B"` instead of ' '`fmin=sp.optimize.fmin_l_bfgs_b"`.') cost_func = CostFuncWrapper(maxeval, progressbar, logp_func) # Check to see if minimization function actually uses the gradient if "fprime" in getargspec(fmin).args: def grad_logp(point): return nan_to_num(-dlogp_func(point)) opt_result = fmin(cost_func, x0, fprime=grad_logp, *args, **kwargs) else: # Check to see if minimization function uses a starting value if "x0" in getargspec(fmin).args: opt_result = fmin(cost_func, x0, *args, **kwargs) else: opt_result = fmin(cost_func, *args, **kwargs) if isinstance(opt_result, tuple): mx0 = opt_result[0] else: mx0 = opt_result else: # remove 'if' part, keep just this 'else' block after version change if compute_gradient: cost_func = CostFuncWrapper(maxeval, progressbar, logp_func, dlogp_func) else: cost_func = CostFuncWrapper(maxeval, progressbar, logp_func) try: opt_result = minimize(cost_func, x0, method=method, jac=compute_gradient, *args, **kwargs) mx0 = opt_result["x"] # r -> opt_result except (KeyboardInterrupt, StopIteration) as e: mx0, opt_result = cost_func.previous_x, None if isinstance(e, StopIteration): pm._log.info(e) finally: last_v = cost_func.n_eval if progressbar: assert isinstance(cost_func.progress, ProgressBar) cost_func.progress.total = last_v cost_func.progress.update(last_v) print() vars = get_default_varnames(model.unobserved_RVs, include_transformed) mx = { var.name: value for var, value in zip(vars, model.fastfn(vars)(bij.rmap(mx0))) } if return_raw: return mx, opt_result else: return mx
def __init__(self, n0=10, init_samples=None, k_trunc=np.inf, eps_z=.01, nf_iter=2, N=10, t_ess=0.5, beta_max=1, model=None, random_seed=-1, chain=0, frac_validate=0.0, iteration=None, alpha_w=(0, 0), alpha_uw=(0, 0), verbose=False, n_component=None, interp_nbin=None, KDE=True, bw_factor_min=1.0, bw_factor_max=1.0, bw_factor_num=1, rel_bw=1, edge_bins=None, ndata_wT=None, MSWD_max_iter=None, NBfirstlayer=True, logit=False, Whiten=False, trainable_qw=False, sgd_steps=0, knots_trainable=5, batchsize=None, nocuda=False, patch=False, shape=[28, 28, 1], bounds=None): self.N = N self.n0 = n0 self.model = model self.chain = chain # Init method params. self.init_samples = init_samples self.random_seed = random_seed # Set the torch seed. if self.random_seed != 1: np.random.seed(self.random_seed) torch.manual_seed(self.random_seed) # Separating out so I can keep track. These are SINF params. assert 0.0 <= frac_validate <= 1.0 self.frac_validate = frac_validate self.iteration = iteration self.alpha_uw = alpha_uw self.alpha_w = alpha_w self.k_trunc = k_trunc self.verbose = verbose self.n_component = n_component self.interp_nbin = interp_nbin self.KDE = KDE self.bw_factors = np.linspace(bw_factor_min, bw_factor_max, bw_factor_num) self.edge_bins = edge_bins self.ndata_wT = ndata_wT self.MSWD_max_iter = MSWD_max_iter self.NBfirstlayer = NBfirstlayer self.logit = logit self.Whiten = Whiten self.batchsize = batchsize self.nocuda = nocuda self.patch = patch self.shape = shape #convert array of bounds passed in from [][x1min,x2min,...],[x1max,x2max...]] to what SINF wants, [[x1min,x1max],[x2min,x2max],...] if (bounds is not None): bounds_sinf = list([list(b) for b in bounds.T]) else: bounds_sinf = [ [None, None] for i in range(init_samples.shape[1]) ] #get the dimensionality from initial samples assuming (N,d) shape self.bounds = bounds_sinf #trainable sinf self.trainable_qw = trainable_qw self.sgd_steps = sgd_steps self.knots_trainable = knots_trainable #nfo self.t_ess = t_ess self.beta_max = beta_max self.beta = 0 #initial value of beta before iterating, match smc self.rel_bw = rel_bw self.model = modelcontext(model) self.variables = inputvars(self.model.vars)
def __init__( self, vars=None, batch_size=None, total_size=None, step_size=1.0, model=None, random_seed=None, minibatches=None, minibatch_tensors=None, **kwargs ): warnings.warn(EXPERIMENTAL_WARNING) model = modelcontext(model) if vars is None: vars = model.vars vars = inputvars(vars) self.model = model self.vars = vars self.batch_size = batch_size self.total_size = total_size _value_error( total_size != None or batch_size != None, "total_size and batch_size of training data have to be specified", ) self.expected_iter = int(total_size / batch_size) # set random stream self.random = None if random_seed is None: self.random = at_rng() else: self.random = at_rng(random_seed) self.step_size = step_size shared = make_shared_replacements(vars, model) self.updates = OrderedDict() self.q_size = int(sum(v.dsize for v in self.vars)) flat_view = model.flatten(vars) self.inarray = [flat_view.input] self.dlog_prior = prior_dlogp(vars, model, flat_view) self.dlogp_elemwise = elemwise_dlogL(vars, model, flat_view) self.q_size = int(sum(v.dsize for v in self.vars)) if minibatch_tensors != None: _check_minibatches(minibatch_tensors, minibatches) self.minibatches = minibatches # Replace input shared variables with tensors def is_shared(t): return isinstance(t, aesara.compile.sharedvalue.SharedVariable) tensors = [(t.type() if is_shared(t) else t) for t in minibatch_tensors] updates = OrderedDict( {t: t_ for t, t_ in zip(minibatch_tensors, tensors) if is_shared(t)} ) self.minibatch_tensors = tensors self.inarray += self.minibatch_tensors self.updates.update(updates) self._initialize_values() super().__init__(vars, shared)
def get_dense_nuts_step( start=None, adaptation_window=101, doubling=True, initial_weight=10, use_hessian=False, use_hessian_diag=False, hessian_regularization=1e-8, model=None, **kwargs, ): """Get a NUTS step function with a dense mass matrix The entries in the mass matrix will be tuned based on the sample covariances during tuning. All extra arguments are passed directly to ``pymc3.NUTS``. Args: start (dict, optional): A starting point in parameter space. If not provided, the model's ``test_point`` is used. adaptation_window (int, optional): The (initial) size of the window used for sample covariance estimation. doubling (bool, optional): If ``True`` (default) the adaptation window is doubled each time the matrix is updated. """ model = modelcontext(model) if not all_continuous(model.vars): raise ValueError("NUTS can only be used for models with only " "continuous variables.") if start is None: start = model.test_point mean = model.dict_to_array(start) if use_hessian or use_hessian_diag: try: import numdifftools as nd except ImportError: raise ImportError( "The 'numdifftools' package is required for Hessian " "computations") logger.info("Numerically estimating Hessian matrix") if use_hessian_diag: hess = nd.Hessdiag(model.logp_array)(mean) var = np.diag(-1.0 / hess) else: hess = nd.Hessian(model.logp_array)(mean) var = -np.linalg.inv(hess) factor = 1 success = False while not success: var[np.diag_indices_from(var)] += factor * hessian_regularization try: np.linalg.cholesky(var) except np.linalg.LinAlgError: factor *= 2 else: success = True else: var = np.eye(len(mean)) potential = QuadPotentialDenseAdapt( model.ndim, mean, var, initial_weight, adaptation_window=adaptation_window, doubling=doubling, ) return pm.NUTS(potential=potential, model=model, **kwargs)
def __init__(self, vars=None, scaling=None, step_scale=0.25, is_cov=False, model=None, blocked=True, use_single_leapfrog=False, potential=None, integrator="leapfrog", **theano_kwargs): """Superclass to implement Hamiltonian/hybrid monte carlo Parameters ---------- vars : list of theano variables scaling : array_like, ndim = {1,2} Scaling for momentum distribution. 1d arrays interpreted matrix diagonal. step_scale : float, default=0.25 Size of steps to take, automatically scaled down by 1/n**(1/4) is_cov : bool, default=False Treat scaling as a covariance matrix/vector if True, else treat it as a precision matrix/vector model : pymc3 Model instance. default=Context model blocked: Boolean, default True use_single_leapfrog: Boolean, will leapfrog steps take a single step at a time. default False. potential : Potential, optional An object that represents the Hamiltonian with methods `velocity`, `energy`, and `random` methods. **theano_kwargs: passed to theano functions """ model = modelcontext(model) if vars is None: vars = model.cont_vars vars = inputvars(vars) if scaling is None and potential is None: scaling = model.test_point if isinstance(scaling, dict): scaling = guess_scaling(Point(scaling, model=model), model=model, vars=vars) if scaling is not None and potential is not None: raise ValueError("Can not specify both potential and scaling.") self.step_size = step_scale / (model.ndim**0.25) if potential is not None: self.potential = potential else: self.potential = quad_potential(scaling, is_cov, as_cov=False) shared = make_shared_replacements(vars, model) if theano_kwargs is None: theano_kwargs = {} self.H, self.compute_energy, self.leapfrog, self.dlogp = get_theano_hamiltonian_functions( vars, shared, model.logpt, self.potential, use_single_leapfrog, integrator, **theano_kwargs) super(BaseHMC, self).__init__(vars, shared, blocked=blocked)
def loo(trace, model=None, reff=None, progressbar=False): """Calculates leave-one-out (LOO) cross-validation for out of sample predictive model fit, following Vehtari et al. (2015). Cross-validation is computed using Pareto-smoothed importance sampling (PSIS). Parameters ---------- trace : result of MCMC run model : PyMC Model Optional model. Default None, taken from context. reff : float relative MCMC efficiency, `effective_n / N` i.e. number of effective samples divided by the number of actual samples. Computed from trace by default. progressbar: bool Whether or not to display a progress bar in the command line. The bar shows the percentage of completion, the evaluation speed, and the estimated time to completion Returns ------- df_loo: pandas.DataFrame Estimation and standard error of `elpd_loo`, `p_loo`, and `looic` pointwise: dict point-wise value of `elpd_loo`, `p_loo`, `looic` and pareto shape `k` """ model = modelcontext(model) if reff is None: if trace.nchains == 1: reff = 1. else: eff = effective_n(trace) eff_ave = pmstat.dict2pd(eff, 'eff').mean() samples = len(trace) * trace.nchains reff = eff_ave / samples log_py = pmstat._log_post_trace(trace, model, progressbar=progressbar) if log_py.size == 0: raise ValueError('The model does not contain observed values.') shape_str = ' by '.join(map(str, log_py.shape)) print('Computed from ' + shape_str + ' log-likelihood matrix') lw, ks = pmstat._psislw(-log_py, reff) lw += log_py elpd_loo_i = logsumexp(lw, axis=0) elpd_loo = elpd_loo_i.sum() elpd_loo_se = (len(elpd_loo_i) * np.var(elpd_loo_i)) ** 0.5 loo_lppd_i = - 2 * elpd_loo_i loo_lppd = loo_lppd_i.sum() loo_lppd_se = (len(loo_lppd_i) * np.var(loo_lppd_i)) ** 0.5 lppd_i = logsumexp(log_py, axis=0, b=1. / log_py.shape[0]) p_loo_i = lppd_i - elpd_loo_i p_loo = p_loo_i.sum() p_loo_se = (len(p_loo_i) * np.var(p_loo_i)) ** 0.5 df_loo = (pd.DataFrame(dict(Estimate=[elpd_loo, p_loo, loo_lppd], SE=[elpd_loo_se, p_loo_se, loo_lppd_se])) .rename(index={0: 'elpd_loo', 1: 'p_loo', 2: 'looic'})) pointwise = dict(elpd_loo=elpd_loo_i, p_loo=p_loo_i, looic=loo_lppd_i, ks=ks) return df_loo, pointwise
def __init__(self, vars=None, scaling=None, step_scale=0.25, is_cov=False, model=None, blocked=True, potential=None, integrator="leapfrog", dtype=None, **theano_kwargs): """Set up Hamiltonian samplers with common structures. Parameters ---------- vars : list of theano variables scaling : array_like, ndim = {1,2} Scaling for momentum distribution. 1d arrays interpreted matrix diagonal. step_scale : float, default=0.25 Size of steps to take, automatically scaled down by 1/n**(1/4) is_cov : bool, default=False Treat scaling as a covariance matrix/vector if True, else treat it as a precision matrix/vector model : pymc3 Model instance blocked: bool, default=True potential : Potential, optional An object that represents the Hamiltonian with methods `velocity`, `energy`, and `random` methods. **theano_kwargs: passed to theano functions """ model = modelcontext(model) if vars is None: vars = model.cont_vars vars = inputvars(vars) super(BaseHMC, self).__init__(vars, blocked=blocked, model=model, dtype=dtype, **theano_kwargs) size = self._logp_dlogp_func.size if scaling is None and potential is None: mean = floatX(np.zeros(size)) var = floatX(np.ones(size)) potential = QuadPotentialDiagAdapt(size, mean, var, 10) if isinstance(scaling, dict): point = Point(scaling, model=model) scaling = guess_scaling(point, model=model, vars=vars) if scaling is not None and potential is not None: raise ValueError("Can not specify both potential and scaling.") self.step_size = step_scale / (size**0.25) if potential is not None: self.potential = potential else: self.potential = quad_potential(scaling, is_cov) self.integrator = integration.CpuLeapfrogIntegrator( size, self.potential, self._logp_dlogp_func)
def sample_nf_smc( draws=2000, start=None, threshold=0.5, frac_validate=0.1, iteration=5, alpha=(0, 0), k_trunc=0.25, pareto=False, epsilon=1e-3, local_thresh=3, local_step_size=0.1, local_grad=True, nf_local_iter=0, max_line_search=2, verbose=False, n_component=None, interp_nbin=None, KDE=True, bw_factor=0.5, edge_bins=None, ndata_wT=None, MSWD_max_iter=None, NBfirstlayer=True, logit=False, Whiten=False, batchsize=None, nocuda=False, patch=False, shape=[28, 28, 1], model=None, random_seed=-1, parallel=False, chains=None, cores=None, ): r""" Sequential Monte Carlo based sampling. Parameters ---------- draws: int The number of samples to draw from the posterior (i.e. last stage). And also the number of independent chains. Defaults to 2000. start: dict, or array of dict Starting point in parameter space. It should be a list of dict with length `chains`. When None (default) the starting point is sampled from the prior distribution. threshold: float Determines the change of beta from stage to stage, i.e.indirectly the number of stages, the higher the value of `threshold` the higher the number of stages. Defaults to 0.5. It should be between 0 and 1. model: Model (optional if in ``with`` context)). random_seed: int random seed parallel: bool Distribute computations across cores if the number of cores is larger than 1. Defaults to False. cores : int The number of chains to run in parallel. If ``None``, set to the number of CPUs in the system, but at most 4. chains : int The number of chains to sample. Running independent chains is important for some convergence statistics. If ``None`` (default), then set to either ``cores`` or 2, whichever is larger. Notes ----- SMC works by moving through successive stages. At each stage the inverse temperature :math:`\beta` is increased a little bit (starting from 0 up to 1). When :math:`\beta` = 0 we have the prior distribution and when :math:`\beta` =1 we have the posterior distribution. So in more general terms we are always computing samples from a tempered posterior that we can write as: .. math:: p(\theta \mid y)_{\beta} = p(y \mid \theta)^{\beta} p(\theta) A summary of the algorithm is: 1. Initialize :math:`\beta` at zero and stage at zero. 2. Generate N samples :math:`S_{\beta}` from the prior (because when :math `\beta = 0` the tempered posterior is the prior). 3. Increase :math:`\beta` in order to make the effective sample size equals some predefined value (we use :math:`Nt`, where :math:`t` is 0.5 by default). 4. Compute a set of N importance weights W. The weights are computed as the ratio of the likelihoods of a sample at stage i+1 and stage i. 5. Obtain :math:`S_{w}` by re-sampling according to W. 6. Use W to compute the mean and covariance for the proposal distribution, a MVNormal. 7. For stages other than 0 use the acceptance rate from the previous stage to estimate `n_steps`. 8. Run N independent Metropolis-Hastings (IMH) chains (each one of length `n_steps`), starting each one from a different sample in :math:`S_{w}`. Samples are IMH as the proposal mean is the of the previous posterior stage and not the current point in parameter space. 9. Repeat from step 3 until :math:`\beta \ge 1`. 10. The final result is a collection of N samples from the posterior. References ---------- .. [Minson2013] Minson, S. E. and Simons, M. and Beck, J. L., (2013), Bayesian inversion for finite fault earthquake source models I- Theory and algorithm. Geophysical Journal International, 2013, 194(3), pp.1701-1726, `link <https://gji.oxfordjournals.org/content/194/3/1701.full>`__ .. [Ching2007] Ching, J. and Chen, Y. (2007). Transitional Markov Chain Monte Carlo Method for Bayesian Model Updating, Model Class Selection, and Model Averaging. J. Eng. Mech., 10.1061/(ASCE)0733-9399(2007)133:7(816), 816-832. `link <http://ascelibrary.org/doi/abs/10.1061/%28ASCE%290733-9399 %282007%29133:7%28816%29>`__ """ _log = logging.getLogger("pymc3") _log.info("Initializing SMC+SINF sampler...") model = modelcontext(model) if model.name: raise NotImplementedError( "The SMC implementation currently does not support named models. " "See https://github.com/pymc-devs/pymc3/pull/4365.") if cores is None: cores = _cpu_count() if chains is None: chains = max(2, cores) elif chains == 1: cores = 1 _log.info(f"Sampling {chains} chain{'s' if chains > 1 else ''} " f"in {cores} job{'s' if cores > 1 else ''}") if random_seed == -1: random_seed = None if chains == 1 and isinstance(random_seed, int): random_seed = [random_seed] if random_seed is None or isinstance(random_seed, int): if random_seed is not None: np.random.seed(random_seed) random_seed = [np.random.randint(2**30) for _ in range(chains)] if not isinstance(random_seed, Iterable): raise TypeError( "Invalid value for `random_seed`. Must be tuple, list or int") params = ( draws, start, threshold, frac_validate, iteration, alpha, k_trunc, pareto, epsilon, local_thresh, local_step_size, local_grad, nf_local_iter, max_line_search, verbose, n_component, interp_nbin, KDE, bw_factor, edge_bins, ndata_wT, MSWD_max_iter, NBfirstlayer, logit, Whiten, batchsize, nocuda, patch, shape, model, ) t1 = time.time() if parallel and chains > 1: loggers = [_log] + [None] * (chains - 1) pool = mp.Pool(cores) results = pool.starmap(sample_nf_smc_int, [(*params, random_seed[i], i, loggers[i]) for i in range(chains)]) pool.close() pool.join() else: results = [] for i in range(chains): results.append(sample_nf_smc_int(*params, random_seed[i], i, _log)) ( traces, log_marginal_likelihood, q_samples, q_log_weights, betas, ) = zip(*results) trace = MultiTrace(traces) trace.report._n_draws = draws trace.report.log_marginal_likelihood = log_marginal_likelihood trace.report.q_samples = q_samples trace.report.q_log_weights = q_log_weights trace.report.betas = betas trace.report._t_sampling = time.time() - t1 return trace
def sample_nfmc(draws=500, init_draws=500, resampling_draws=500, init_ess=100, init_method='prior', init_samples=None, start=None, sample_mode='reinit', finish_regularized=False, cull_lowp_tol=0.05, init_EL2O='adam', mean_field_EL2O=False, use_hess_EL2O=False, absEL2O=1e-10, fracEL2O=1e-2, EL2O_draws=100, maxiter_EL2O=500, EL2O_optim_method='L-BFGS-B', scipy_map_method='L-BFGS-B', adam_lr=1e-3, adam_b1=0.9, adam_b2=0.999, adam_eps=1.0e-8, adam_steps=1000, simulator=None, model_data=None, sim_data_cov=None, sim_size=None, sim_params=None, sim_start=None, sim_optim_method='lbfgs', sim_tol=0.01, local_thresh=3, local_step_size=0.1, local_grad=True, init_local=True, full_local=False, nf_local_iter=3, max_line_search=100, k_trunc=0.25, norm_tol=0.01, ess_tol=0.5, optim_iter=1000, ftol=2.220446049250313e-9, gtol=1.0e-5, nf_iter=3, model=None, frac_validate=0.1, iteration=None, final_iteration=None, alpha=(0, 0), final_alpha=(0.75, 0.75), verbose=False, n_component=None, interp_nbin=None, KDE=True, bw_factor_min=0.5, bw_factor_max=2.5, bw_factor_num=11, edge_bins=None, ndata_wT=None, MSWD_max_iter=None, NBfirstlayer=True, logit=False, Whiten=False, batchsize=None, nocuda=False, patch=False, shape=[28, 28, 1], redraw=True, random_seed=-1, parallel=False, chains=None, cores=None): r""" Normalizing flow based nested sampling. Parameters ---------- draws: int The number of samples to draw from the posterior (i.e. last stage). And also the number of independent chains. Defaults to 2000. start: dict, or array of dict Starting point in parameter space. It should be a list of dict with length `chains`. When None (default) the starting point is sampled from the prior distribution. init_method: str Tells us how to initialize the NFMC fits. Default is 'prior'. If this is supplied along with init_samples we use those instead. Current options are 'prior', 'full_rank', 'lbfgs'. norm_tol: float Fractional difference in the evidence estimate between two steps. If it falls below this we stop iterating over the NF fits. optim_iter: int Maximum number of optimization steps to run during the initialization. nf_iter: int Number of NF fit iterations to go through after the optimization step. model: Model (optional if in ``with`` context)). frac_validate: float Fraction of the live points at each NS iteration that we use for validation of the NF fit. alpha: tuple of floats Regularization parameters used for the NF fit. verbose: boolean Whether you want verbose output from the NF fit. random_seed: int random seed parallel: bool Distribute computations across cores if the number of cores is larger than 1. Defaults to False. cores : int Number of cores available for the optimization step. Defaults to None, in which case the CPU count is used. chains : int The number of chains to sample. Running independent chains is important for some convergence statistics. Default is 2. """ _log = logging.getLogger("pymc3") _log.info("Initializing normalizing flow based sampling...") model = modelcontext(model) if model.name: raise NotImplementedError( "The NS_NFMC implementation currently does not support named models. " "See https://github.com/pymc-devs/pymc3/pull/4365.") if cores is None: cores = _cpu_count() _log.info(f"Sampling {chains} chain{'s' if chains > 1 else ''} " f"Cores available for optimization: {cores}") if random_seed == -1: random_seed = None if chains == 1 and isinstance(random_seed, int): random_seed = [random_seed] if random_seed is None or isinstance(random_seed, int): if random_seed is not None: np.random.seed(random_seed) random_seed = [np.random.randint(2**30) for _ in range(chains)] if not isinstance(random_seed, Iterable): raise TypeError( "Invalid value for `random_seed`. Must be tuple, list or int") assert (sample_mode == 'reinit' or sample_mode == 'keep_local' or sample_mode == 'function_approx') params = ( draws, init_draws, resampling_draws, init_ess, init_method, init_samples, start, sample_mode, finish_regularized, cull_lowp_tol, init_EL2O, mean_field_EL2O, use_hess_EL2O, absEL2O, fracEL2O, EL2O_draws, maxiter_EL2O, EL2O_optim_method, scipy_map_method, adam_lr, adam_b1, adam_b2, adam_eps, adam_steps, simulator, model_data, sim_data_cov, sim_size, sim_params, sim_start, sim_optim_method, sim_tol, local_thresh, local_step_size, local_grad, init_local, full_local, nf_local_iter, max_line_search, k_trunc, norm_tol, ess_tol, optim_iter, ftol, gtol, nf_iter, model, frac_validate, iteration, final_iteration, alpha, final_alpha, cores, verbose, n_component, interp_nbin, KDE, bw_factor_min, bw_factor_max, bw_factor_num, edge_bins, ndata_wT, MSWD_max_iter, NBfirstlayer, logit, Whiten, batchsize, nocuda, patch, shape, redraw, parallel, ) t1 = time.time() results = [] for i in range(chains): results.append(sample_nfmc_int(*params, random_seed[i], i, _log)) (traces, log_evidence, q_samples, importance_weights, total_samples, total_weights, logp, logq, train_logp, train_logq, logZ, q_models, q_ess, train_ess, total_ess, min_var_bws, min_pq_bws) = zip(*results) trace = MultiTrace(traces) trace.report.log_evidence = log_evidence trace.report.q_samples = q_samples trace.report.importance_weights = importance_weights trace.report.total_samples = total_samples trace.report.total_weights = total_weights trace.report.logp = logp trace.report.logq = logq trace.report.train_logp = train_logp trace.report.train_logq = train_logq trace.report.logZ = logZ trace.report.q_models = q_models trace.report.q_ess = q_ess trace.report.train_ess = train_ess trace.report.total_ess = total_ess trace.report._n_draws = draws trace.report.min_var_bws = min_var_bws trace.report.min_pq_bws = min_pq_bws trace.report._t_sampling = time.time() - t1 return trace
def __init__( self, draws=2000, start=None, threshold=0.5, model=None, random_seed=-1, chain=0, frac_validate=0.1, iteration=None, alpha=(0, 0), k_trunc=0.5, pareto=False, epsilon=1e-3, local_thresh=3, local_step_size=0.1, local_grad=True, nf_local_iter=0, max_line_search=2, verbose=False, n_component=None, interp_nbin=None, KDE=True, bw_factor=0.5, edge_bins=None, ndata_wT=None, MSWD_max_iter=None, NBfirstlayer=True, logit=False, Whiten=False, batchsize=None, nocuda=False, patch=False, shape=[28, 28, 1], ): self.draws = draws self.start = start self.threshold = threshold self.model = model self.random_seed = random_seed self.chain = chain self.frac_validate = frac_validate self.iteration = iteration self.alpha = alpha self.k_trunc = k_trunc self.pareto = pareto self.epsilon = epsilon self.local_thresh = local_thresh self.local_step_size = local_step_size self.local_grad = local_grad self.nf_local_iter = nf_local_iter self.max_line_search = max_line_search self.verbose = verbose self.n_component = n_component self.interp_nbin = interp_nbin self.KDE = KDE self.bw_factor = bw_factor self.edge_bins = edge_bins self.ndata_wT = ndata_wT self.MSWD_max_iter = MSWD_max_iter self.NBfirstlayer = NBfirstlayer self.logit = logit self.Whiten = Whiten self.batchsize = batchsize self.nocuda = nocuda self.patch = patch self.shape = shape self.model = modelcontext(model) if self.random_seed != -1: np.random.seed(self.random_seed) self.beta = 0 self.variables = inputvars(self.model.vars) self.weights = np.ones(self.draws) / self.draws #self.sinf_logq = np.array([]) self.log_marginal_likelihood = 0
def Marginal_llk(mtrace, model=None, ADVI=False, trace2=None, logp=None, maxiter=1000, burn_in=1000): """The Bridge Sampling Estimator of the Marginal Likelihood. Parameters ---------- mtrace : MultiTrace, result of MCMC run model : PyMC Model Optional model. Default None, taken from context. logp : Model Log-probability function, read from the model by default maxiter : Maximum number of iterations Returns ------- marg_llk : Estimated Marginal log-Likelihood. """ r0, tol1, tol2 = 0.5, 1e-2, 1e-2 model = modelcontext(model) if logp is None: logp = model.logp_array vars = model.free_RVs len_trace = len(mtrace) if ADVI == False: nchain = mtrace.nchains N1_ = len_trace // 2 N1 = N1_ * nchain N2 = len_trace * nchain - N1 neff_list = dict() else: nchain = 2 N1_ = len_trace N1 = N1_ N2 = len_trace arraysz = model.bijection.ordering.size samples_4_fit = np.zeros((arraysz, N1)) samples_4_iter = np.zeros((arraysz, N2)) for var in vars: varmap = model.bijection.ordering.by_name[var.name] neff_list = dict() if ADVI == True: x = mtrace[0:N1_][var.name] samples_4_fit[varmap.slc, :] = x else: x = mtrace[0:N1_][var.name] samples_4_fit[varmap.slc, :] = x.reshape( (x.shape[0], np.prod(x.shape[1:], dtype=int))).T if ADVI == True: x2 = trace2[0:][var.name] samples_4_iter[varmap.slc, :] = x2 neff_list.update(pm.effective_n(trace2[0:], varnames=[var.name])) else: x2 = mtrace[N1_:][var.name] samples_4_iter[varmap.slc, :] = x2.reshape( (x2.shape[0], np.prod(x2.shape[1:], dtype=int))).T neff_list.update(pm.effective_n(mtrace[N1_:], varnames=[var.name])) neff = pm.stats.dict2pd(neff_list, 'temp').median() m = np.mean(samples_4_fit, axis=1) V = np.cov(samples_4_fit) if np.all(np.linalg.eigvals(V) > 0): L = chol(V, lower=True) else: print('SDP converting') V = sdp.nearPD(V) L = chol(V, lower=True) print('m: ', np.sum(np.isinf(m[:, None]))) gen_samples = m[:, None] + dot( L, st.norm.rvs(0, 1, size=samples_4_iter.shape)) print('gen_samples: ', np.sum(np.isinf(gen_samples))) #gen_samples[gen_samples == inf] = 0 # Evaluate proposal distribution for posterior & generated samples q12 = st.multivariate_normal.logpdf(samples_4_iter.T, m, V) q22 = st.multivariate_normal.logpdf(gen_samples.T, m, V) print('q12: ', np.sum(np.isinf(q12))) print('q22: ', np.sum(np.isinf(q22))) # Evaluate unnormalized posterior for posterior & generated samples q11 = np.asarray([logp(point) for point in samples_4_iter.T]) q21 = np.asarray([logp(point) for point in gen_samples.T]) q21[np.isneginf(q21)] = -100000 q11[np.isneginf(q11)] = -100000 def iterative_scheme(q11, q12, q21, q22, r0, neff, tol, maxiter, criterion): l1 = q11 - q12 l2 = q21 - q22 lstar = np.median(l1) # To increase numerical stability, # subtracting the median of l1 from l1 & l2 later print('neef: ', neff) s1 = neff / (neff + N2) s2 = N2 / (neff + N2) r = r0 r_vals = [r] logml = np.log(r) + lstar criterion_val = 1 + tol i = 0 while (i <= maxiter) & (criterion_val > tol): print('i: ', i) print('maxiter', maxiter) print('criterionval: ', criterion_val) print('tol: ', tol) rold = r logmlold = logml numi = np.exp(l2 - lstar) / (s1 * np.exp(l2 - lstar) + s2 * r) print('l2: ', l2) print('lstar: ', lstar) print('s1: ', s1) print('r :', r) print('Num: ', numi) deni = 1 / (s1 * np.exp(l1 - lstar) + s2 * r) print('Den: ', deni) if np.sum(~np.isfinite(numi)) + np.sum(~np.isfinite(deni)) > 0: warn("""Infinite value in iterative scheme, returning NaN. Try rerunning with more samples.""") r = (N1 / N2) * np.sum(numi) / np.sum(deni) print('r: ', r) r_vals.append(r) logml = np.log(r) + lstar print('Logml: ', logml) i += 1 if criterion == 'r': criterion_val = np.abs((r - rold) / r) elif criterion == 'logml': criterion_val = np.abs((logml - logmlold) / logml) print('criterion val: ', criterion_val) if i >= maxiter: return dict(logml=np.NaN, niter=i, r_vals=np.asarray(r_vals)) else: return dict(logml=logml, niter=i) tmp = iterative_scheme(q11, q12, q21, q22, r0, neff, tol1, maxiter, 'r') if ~np.isfinite(tmp['logml']): warn("""logml could not be estimated within maxiter, rerunning with adjusted starting value. Estimate might be more variable than usual.""" ) # use geometric mean as starting value r0_2 = np.sqrt(tmp['r_vals'][-2] * tmp['r_vals'][-1]) tmp = iterative_scheme(q11, q12, q21, q22, r0_2, neff, tol2, maxiter, 'r') return dict(logml=tmp['logml'], niter=tmp['niter'], method="normal", q11=q11, q12=q12, q21=q21, q22=q22)
def sample_smc( draws=2000, kernel="metropolis", n_steps=25, start=None, tune_steps=True, p_acc_rate=0.85, threshold=0.5, save_sim_data=False, save_log_pseudolikelihood=True, model=None, random_seed=-1, parallel=False, chains=None, cores=None, ): r""" Sequential Monte Carlo based sampling. Parameters ---------- draws: int The number of samples to draw from the posterior (i.e. last stage). And also the number of independent chains. Defaults to 2000. kernel: str Kernel method for the SMC sampler. Available option are ``metropolis`` (default) and `ABC`. Use `ABC` for likelihood free inference together with a ``pm.Simulator``. n_steps: int The number of steps of each Markov Chain. If ``tune_steps == True`` ``n_steps`` will be used for the first stage and for the others it will be determined automatically based on the acceptance rate and `p_acc_rate`, the max number of steps is ``n_steps``. start: dict, or array of dict Starting point in parameter space. It should be a list of dict with length `chains`. When None (default) the starting point is sampled from the prior distribution. tune_steps: bool Whether to compute the number of steps automatically or not. Defaults to True p_acc_rate: float Used to compute ``n_steps`` when ``tune_steps == True``. The higher the value of ``p_acc_rate`` the higher the number of steps computed automatically. Defaults to 0.85. It should be between 0 and 1. threshold: float Determines the change of beta from stage to stage, i.e.indirectly the number of stages, the higher the value of `threshold` the higher the number of stages. Defaults to 0.5. It should be between 0 and 1. save_sim_data : bool Whether or not to save the simulated data. This parameter only works with the ABC kernel. The stored data corresponds to a samples from the posterior predictive distribution. save_log_pseudolikelihood : bool Whether or not to save the log pseudolikelihood values. This parameter only works with the ABC kernel. The stored data can be used to compute LOO or WAIC values. Computing LOO/WAIC values from log pseudolikelihood values is experimental. model: Model (optional if in ``with`` context)). random_seed: int random seed parallel: bool Distribute computations across cores if the number of cores is larger than 1. Defaults to False. cores : int The number of chains to run in parallel. If ``None``, set to the number of CPUs in the system, but at most 4. chains : int The number of chains to sample. Running independent chains is important for some convergence statistics. If ``None`` (default), then set to either ``cores`` or 2, whichever is larger. Notes ----- SMC works by moving through successive stages. At each stage the inverse temperature :math:`\beta` is increased a little bit (starting from 0 up to 1). When :math:`\beta` = 0 we have the prior distribution and when :math:`\beta` =1 we have the posterior distribution. So in more general terms we are always computing samples from a tempered posterior that we can write as: .. math:: p(\theta \mid y)_{\beta} = p(y \mid \theta)^{\beta} p(\theta) A summary of the algorithm is: 1. Initialize :math:`\beta` at zero and stage at zero. 2. Generate N samples :math:`S_{\beta}` from the prior (because when :math `\beta = 0` the tempered posterior is the prior). 3. Increase :math:`\beta` in order to make the effective sample size equals some predefined value (we use :math:`Nt`, where :math:`t` is 0.5 by default). 4. Compute a set of N importance weights W. The weights are computed as the ratio of the likelihoods of a sample at stage i+1 and stage i. 5. Obtain :math:`S_{w}` by re-sampling according to W. 6. Use W to compute the mean and covariance for the proposal distribution, a MVNormal. 7. For stages other than 0 use the acceptance rate from the previous stage to estimate `n_steps`. 8. Run N independent Metropolis-Hastings (IMH) chains (each one of length `n_steps`), starting each one from a different sample in :math:`S_{w}`. Samples are IMH as the proposal mean is the of the previous posterior stage and not the current point in parameter space. 9. Repeat from step 3 until :math:`\beta \ge 1`. 10. The final result is a collection of N samples from the posterior. References ---------- .. [Minson2013] Minson, S. E. and Simons, M. and Beck, J. L., (2013), Bayesian inversion for finite fault earthquake source models I- Theory and algorithm. Geophysical Journal International, 2013, 194(3), pp.1701-1726, `link <https://gji.oxfordjournals.org/content/194/3/1701.full>`__ .. [Ching2007] Ching, J. and Chen, Y. (2007). Transitional Markov Chain Monte Carlo Method for Bayesian Model Updating, Model Class Selection, and Model Averaging. J. Eng. Mech., 10.1061/(ASCE)0733-9399(2007)133:7(816), 816-832. `link <http://ascelibrary.org/doi/abs/10.1061/%28ASCE%290733-9399 %282007%29133:7%28816%29>`__ """ _log = logging.getLogger("pymc3") _log.info("Initializing SMC sampler...") model = modelcontext(model) if model.name: raise NotImplementedError( "The SMC implementation currently does not support named models. " "See https://github.com/pymc-devs/pymc3/pull/4365.") if cores is None: cores = _cpu_count() if chains is None: chains = max(2, cores) elif chains == 1: cores = 1 _log.info(f"Sampling {chains} chain{'s' if chains > 1 else ''} " f"in {cores} job{'s' if cores > 1 else ''}") if random_seed == -1: random_seed = None if chains == 1 and isinstance(random_seed, int): random_seed = [random_seed] if random_seed is None or isinstance(random_seed, int): if random_seed is not None: np.random.seed(random_seed) random_seed = [np.random.randint(2**30) for _ in range(chains)] if not isinstance(random_seed, Iterable): raise TypeError( "Invalid value for `random_seed`. Must be tuple, list or int") if kernel.lower() == "abc": if len(model.observed_RVs) != 1: warnings.warn( "SMC-ABC only works properly with models with one observed variable" ) if model.potentials: _log.info("Potentials will be added to the prior term") params = ( draws, kernel, n_steps, start, tune_steps, p_acc_rate, threshold, save_sim_data, save_log_pseudolikelihood, model, ) t1 = time.time() if parallel and chains > 1: loggers = [_log] + [None] * (chains - 1) pool = mp.Pool(cores) results = pool.starmap(sample_smc_int, [(*params, random_seed[i], i, loggers[i]) for i in range(chains)]) pool.close() pool.join() else: results = [] for i in range(chains): results.append(sample_smc_int(*params, random_seed[i], i, _log)) ( traces, sim_data, log_marginal_likelihoods, log_pseudolikelihood, betas, accept_ratios, nsteps, ) = zip(*results) trace = MultiTrace(traces) trace.report._n_draws = draws trace.report._n_tune = 0 trace.report.log_marginal_likelihood = np.array(log_marginal_likelihoods) trace.report.log_pseudolikelihood = log_pseudolikelihood trace.report.betas = betas trace.report.accept_ratios = accept_ratios trace.report.nsteps = nsteps trace.report._t_sampling = time.time() - t1 if save_sim_data: return trace, { modelcontext(model).observed_RVs[0].name: np.array(sim_data) } else: return trace
def opt_nfo( #Optimization parameters #initialization n0=10, #int, n0 the initial number of draws init_samples=None, #array, Whether to provide some pre-defined sequence or do pymc3 sampling #approximation k_trunc=np.inf, #IW clipping, not used by default eps_z=0.01, #float, tolerance on Z for q iter convergence (eps') #currently not used since not iterating SINF unless trainable nf_iter=1, #int, number of NF iters -should always be 1 in our implementation #annealing N=10, #int, N the TOTAL number of draws we want at each iteration - this is no longer used, is from when we used to run multiple fits t_ess=0.5, #float, ESS<t_ess*n0 t threshold on ESS for ESS3 (no longer temperature) g_AF=0, #float, size of gradient contribution to AF, not used now #exploration N_AF=1000, #int,number of points to use in q_w sampling for AF expl_top_AF=1, #int,cut for the top AF at a given temp level accepted at each beta expl_latent=0, #int,latent draw from around top IW1 or around random draw from q_w, accepted at each step expl_top_qw=0, #int,keep top q_w at this iteration beta_max=1, #float>0,highest exponent on tempered posterior, support >1 for exploitation rel_beta=1, #0<float<1, β2 = rel_beta*β, where β2 is the lower temp level used for sampling q_w, what we call 'X' frac_rel_beta_AF=1, #int, the modifier to the AF used to up/down-weight the w vs uw contribution, what we call "Y" latent_sigma=None, #float, the value of l use_latent_beta2=False, #whether to get the latent sample from q_w(β2) or from q_uw use_pq_beta_IW1=False, #whether to get the latent sample from near top IW1 or randomly from q_w bounds=None, #array, size 2xd, bounding box for samples FIXME make this more obvious, needed for prior N_temp=25, #int, cutoff on number of allowed temp iterations before giving up -> #FIXME eventually make this throw error #NF parameters model=None, frac_validate=0.0, iteration=None, alpha_w=(0, 0), alpha_uw=(0, 0), verbose=False, n_component=None, interp_nbin=None, KDE=True, bw_factor_min=1.0, bw_factor_max=1.0, bw_factor_num=1, rel_bw=1, edge_bins=None, ndata_wT=None, MSWD_max_iter=None, NBfirstlayer=True, logit=False, Whiten=False, trainable_qw=False, #whether to improve our q_w at each beta iteration with SGD sgd_steps=0, #number of steps used in Adam when training trainable q_w knots_trainable=5, batchsize=None, nocuda=False, patch=False, shape=[28, 28, 1], #Runtime random_seed=-1, parallel=False, cores=None): r""" Normalizing flow-based Bayesian Optimization. Parameters ---------- draws: int The number of samples to draw from the posterior (i.e. last stage). And also the number of independent chains. Defaults to 2000. norm_tol: float Fractional difference in the evidence estimate between two steps. If it falls below this we stop iterating over the NF fits. optim_iter: int Maximum number of optimization steps to run during the initialization. nf_iter: int Number of NF fit iterations to go through after the optimization step. model: Model (optional if in ``with`` context)). frac_validate: float Fraction of the live points at each NS iteration that we use for validation of the NF fit. alpha: tuple of floats Regularization parameters used for the NF fit. verbose: boolean Whether you want verbose output from the NF fit. random_seed: int random seed parallel: bool Distribute computations across cores if the number of cores is larger than 1. Defaults to False. cores : int Number of cores available for the optimization step. Defaults to None, in which case the CPU count is used. """ _log = logging.getLogger("pymc3") _log.info("Initializing normalizing flow-based optimization...") model = modelcontext(model) if model.name: raise NotImplementedError( "The NS_NFO implementation currently does not support named models. " "See https://github.com/pymc-devs/pymc3/pull/4365.") if cores is None: cores = _cpu_count() chains = 1 _log.info(f"Sampling {chains} chain{'s' if chains > 1 else ''} " f"Cores available for optimization: {cores}") if random_seed == -1: random_seed = None if chains == 1 and isinstance(random_seed, int): random_seed = [random_seed] if random_seed is None or isinstance(random_seed, int): if random_seed is not None: np.random.seed(random_seed) random_seed = [np.random.randint(2**30) for _ in range(chains)] if not isinstance(random_seed, Iterable): raise TypeError( "Invalid value for `random_seed`. Must be tuple, list or int") #we changed the name for end-user-facing readability, but internally more familiar with these names aN, bN, cN, dN = N_AF, expl_top_AF, expl_latent, expl_top_qw params = ( n0, init_samples, k_trunc, eps_z, nf_iter, N, t_ess, g_AF, aN, bN, cN, dN, beta_max, rel_beta, frac_rel_beta_AF, latent_sigma, use_latent_beta2, use_pq_beta_IW1, bounds, N_temp, model, frac_validate, iteration, alpha_w, alpha_uw, cores, verbose, n_component, interp_nbin, KDE, bw_factor_min, bw_factor_max, bw_factor_num, rel_bw, edge_bins, ndata_wT, MSWD_max_iter, NBfirstlayer, logit, Whiten, trainable_qw, sgd_steps, knots_trainable, batchsize, nocuda, patch, shape, parallel, ) t1 = time.time() results = [] for i in range(chains): results.append(opt_nfo_int(*params, random_seed[i], i, _log)) ( traces, log_evidence, q_samples, importance_weights, logp, logq, train_logp, train_logq, logZ, q_models, q_ess, total_ess, min_var_bws, min_pq_bws, betas, ) = zip(*results) trace = MultiTrace(traces) trace.report.log_evidence = log_evidence trace.report.q_samples = q_samples trace.report.importance_weights = importance_weights trace.report.logp = logp trace.report.logq = logq trace.report.train_logp = train_logp trace.report.train_logq = train_logq trace.report.logZ = logZ trace.report.q_models = q_models trace.report.q_ess = q_ess trace.report.total_ess = total_ess trace.report.N = N trace.report.min_var_bws = min_var_bws trace.report.min_pq_bws = min_pq_bws trace.report._t_sampling = time.time() - t1 trace.report.betas = betas return trace
def sample_ns_nfmc( draws=2000, start=None, rho=0.01, epsilon=0.01, model=None, frac_validate=0.8, alpha=(0,0), verbose=False, random_seed=-1, parallel=False, chains=None, cores=None, ): r""" Normalizing flow based nested sampling. Parameters ---------- draws: int The number of samples to draw from the posterior (i.e. last stage). And also the number of independent chains. Defaults to 2000. start: dict, or array of dict Starting point in parameter space. It should be a list of dict with length `chains`. When None (default) the starting point is sampled from the prior distribution. rho: float Sets fraction of points we want to be above the likelihood threshold at each iteration. Used to adaptively set the likelihood threshold during sampling. epsilon: float Stopping factor for the algorithm. At each iteration we compare the ratio of the evidences from the current and previous iterations. If it is less than 1-epsilon we stop. model: Model (optional if in ``with`` context)). frac_validate: float Fraction of the live points at each NS iteration that we use for validation of the NF fit. alpha: tuple of floats Regularization parameters used for the NF fit. verbose: boolean Whether you want verbose output from the NF fit. random_seed: int random seed parallel: bool Distribute computations across cores if the number of cores is larger than 1. Defaults to False. cores : int The number of chains to run in parallel. If ``None``, set to the number of CPUs in the system, but at most 4. chains : int The number of chains to sample. Running independent chains is important for some convergence statistics. If ``None`` (default), then set to either ``cores`` or 2, whichever is larger. """ _log = logging.getLogger("pymc3") _log.info("Initializing normalizing flow based nested sampling...") model = modelcontext(model) if model.name: raise NotImplementedError( "The NS_NFMC implementation currently does not support named models. " "See https://github.com/pymc-devs/pymc3/pull/4365." ) if cores is None: cores = _cpu_count() if chains is None: chains = max(2, cores) elif chains == 1: cores = 1 _log.info( f"Sampling {chains} chain{'s' if chains > 1 else ''} " f"in {cores} job{'s' if cores > 1 else ''}" ) if random_seed == -1: random_seed = None if chains == 1 and isinstance(random_seed, int): random_seed = [random_seed] if random_seed is None or isinstance(random_seed, int): if random_seed is not None: np.random.seed(random_seed) random_seed = [np.random.randint(2 ** 30) for _ in range(chains)] if not isinstance(random_seed, Iterable): raise TypeError("Invalid value for `random_seed`. Must be tuple, list or int") params = ( draws, start, rho, epsilon, model, frac_validate, alpha, verbose, ) t1 = time.time() if parallel and chains > 1: loggers = [_log] + [None] * (chains - 1) pool = mp.Pool(cores) results = pool.starmap( sample_ns_nfmc_int, [(*params, random_seed[i], i, loggers[i]) for i in range(chains)] ) pool.close() pool.join() else: results = [] for i in range(chains): results.append(sample_ns_nfmc_int(*params, random_seed[i], i, _log)) ( traces, log_evidence, log_evidences, likelihood_logp_thresh, ) = zip(*results) trace = MultiTrace(traces) trace.report._n_draws = draws trace.report.log_evidence = np.array(log_evidence) trace.report._t_sampling = time.time() - t1 return trace
def _iter_sample(draws, step, start=None, trace=None, chain=0, tune=None, model=None, random_seed=-1, overwrite=True, update_proposal=False, keep_last=False): """ Modified from :func:`pymc3.sampling._iter_sample` tune: int adaptiv step-size scaling is stopped after this chain sample """ model = modelcontext(model) draws = int(draws) if draws < 1: raise ValueError('Argument `draws` should be above 0.') if start is None: start = {} if random_seed != -1: seed(random_seed) try: step = CompoundStep(step) except TypeError: pass point = Point(start, model=model) step.chain_index = chain trace.setup(draws, chain, overwrite=overwrite) for i in range(draws): if i == tune: step = stop_tuning(step) logger.debug('Step: Chain_%i step_%i' % (chain, i)) point, out_list = step.step(point) try: trace.buffer_write(out_list, step.cumulative_samples) except BufferError: # buffer full last_sample = deepcopy(trace.buffer[-1]) if update_proposal: # only valid for PT for now if step.proposal_name in multivariate_proposals: cov = trace.get_sample_covariance(step) if cov is not None: if not isinstance(trace, MemoryChain): filename = '%s/proposal_cov_chain_%i_%i.%s' % ( trace.dir_path, trace.chain, trace.cov_counter, 'png') from matplotlib import pyplot as plt fig, axs = plt.subplots(1, 1) im = axs.imshow(cov, aspect='auto') plt.colorbar(im) fig.savefig(filename, dpi=150) plt.close(fig) step.proposal_dist = choose_proposal( step.proposal_name, scale=cov) trace.record_buffer() if keep_last: # put last sample back trace.buffer_write(*last_sample) yield trace
def __init__( self, *, trace=None, prior=None, posterior_predictive=None, log_likelihood=True, predictions=None, coords: Optional[CoordSpec] = None, dims: Optional[DimSpec] = None, model=None, save_warmup: Optional[bool] = None, density_dist_obs: bool = True, index_origin: Optional[int] = None, ): self.save_warmup = rcParams[ "data.save_warmup"] if save_warmup is None else save_warmup self.trace = trace # this permits us to get the model from command-line argument or from with model: self.model = modelcontext(model) self.attrs = None if trace is not None: self.nchains = trace.nchains if hasattr(trace, "nchains") else 1 if hasattr(trace.report, "n_draws") and trace.report.n_draws is not None: self.ndraws = trace.report.n_draws self.attrs = { "sampling_time": trace.report.t_sampling, "tuning_steps": trace.report.n_tune, } else: self.ndraws = len(trace) if self.save_warmup: warnings.warn( "Warmup samples will be stored in posterior group and will not be" " excluded from stats and diagnostics." " Do not slice the trace manually before conversion", UserWarning, ) self.ntune = len(self.trace) - self.ndraws self.posterior_trace, self.warmup_trace = self.split_trace() else: self.nchains = self.ndraws = 0 self.prior = prior self.posterior_predictive = posterior_predictive self.log_likelihood = log_likelihood self.predictions = predictions self.index_origin = rcParams[ "data.index_origin"] if index_origin is None else index_origin def arbitrary_element(dct: Dict[Any, np.ndarray]) -> np.ndarray: return next(iter(dct.values())) if trace is None: # if you have a posterior_predictive built with keep_dims, # you'll lose here, but there's nothing I can do about that. self.nchains = 1 get_from = None if predictions is not None: get_from = predictions elif posterior_predictive is not None: get_from = posterior_predictive elif prior is not None: get_from = prior if get_from is None: # pylint: disable=line-too-long raise ValueError( "When constructing InferenceData must have at least" " one of trace, prior, posterior_predictive or predictions." ) aelem = arbitrary_element(get_from) self.ndraws = aelem.shape[0] self.coords = {} if coords is None else coords if hasattr(self.model, "coords"): self.coords = {**self.model.coords, **self.coords} self.coords = { key: value for key, value in self.coords.items() if value is not None } self.dims = {} if dims is None else dims if hasattr(self.model, "RV_dims"): model_dims = { var_name: [dim for dim in dims if dim is not None] for var_name, dims in self.model.RV_dims.items() } self.dims = {**model_dims, **self.dims} self.density_dist_obs = density_dist_obs self.observations = self.find_observations()
def smc_sample( n_steps, step=None, start=None, homepath=None, chain=0, stage=0, n_jobs=1, tune=None, progressbar=False, buffer_size=5000, model=None, update=None, random_seed=None, rm_flag=False): """ Sequential Monte Carlo samlping Samples the solution space with n_chains of Metropolis chains, where each chain has n_steps iterations. Once finished, the sampled traces are evaluated: (1) Based on the likelihoods of the final samples, chains are weighted (2) the weighted covariance of the ensemble is calculated and set as new proposal distribution (3) the variation in the ensemble is calculated and the next tempering parameter (beta) calculated (4) New n_chains Metropolis chains are seeded on the traces with high weight for n_steps iterations (5) Repeat until beta > 1. Parameters ---------- n_steps : int The number of samples to draw for each Markov-chain per stage step : :class:`SMC` SMC initialisation object start : List of dictionaries with length of (n_chains) Starting points in parameter space (or partial point) Defaults to random draws from variables (defaults to empty dict) chain : int Chain number used to store sample in backend. If `n_jobs` is greater than one, chain numbers will start here. stage : int Stage where to start or continue the calculation. It is possible to continue after completed stages (stage should be the number of the completed stage + 1). If None the start will be at stage = 0. n_jobs : int The number of cores to be used in parallel. Be aware that theano has internal parallelisation. Sometimes this is more efficient especially for simple models. step.n_chains / n_jobs has to be an integer number! tune : int Number of iterations to tune, if applicable (defaults to None) homepath : string Result_folder for storing stages, will be created if not existing. progressbar : bool Flag for displaying a progress bar buffer_size : int this is the number of samples after which the buffer is written to disk or if the chain end is reached model : :class:`pymc3.Model` (optional if in `with` context) has to contain deterministic variable name defined under step.likelihood_name' that contains the model likelihood update : :py:class:`models.Problem` Problem object that contains all the observed data and (if applicable) covariances to be updated each transition step. rm_flag : bool If True existing stage result folders are being deleted prior to sampling. References ---------- .. [Minson2013] Minson, S. E. and Simons, M. and Beck, J. L., (2013), Bayesian inversion for finite fault earthquake source models I- Theory and algorithm. Geophysical Journal International, 2013, 194(3), pp.1701-1726, `link <https://gji.oxfordjournals.org/content/194/3/1701.full>`__ """ model = modelcontext(model) step.n_steps = int(n_steps) if n_steps < 1: raise TypeError('Argument `n_steps` should be above 0.', exc_info=1) if step is None: raise TypeError('Argument `step` has to be a SMC step object.') if homepath is None: raise TypeError( 'Argument `homepath` should be path to result_directory.') if n_jobs > 1: if not (step.n_chains / float(n_jobs)).is_integer(): raise ValueError('n_chains / n_jobs has to be a whole number!') if start is not None: if len(start) != step.n_chains: raise TypeError('Argument `start` should have dicts equal the ' 'number of chains (step.N-chains)') else: step.population = start if not any( step.likelihood_name in var.name for var in model.deterministics): raise TypeError('Model (deterministic) variables need to contain ' 'a variable %s ' 'as defined in `step`.' % step.likelihood_name) stage_handler = backend.TextStage(homepath) chains, step, update = init_stage( stage_handler=stage_handler, step=step, stage=stage, progressbar=progressbar, update=update, model=model, rm_flag=rm_flag) with model: while step.beta < 1.: if step.stage == 0: # Initial stage logger.info('Sample initial stage: ...') draws = 1 else: draws = n_steps logger.info('Beta: %f Stage: %i' % (step.beta, step.stage)) # Metropolis sampling intermediate stages chains = stage_handler.clean_directory(step.stage, chains, rm_flag) sample_args = { 'draws': draws, 'step': step, 'stage_path': stage_handler.stage_path(step.stage), 'progressbar': progressbar, 'model': model, 'n_jobs': n_jobs, 'chains': chains, 'buffer_size': buffer_size} mtrace = iter_parallel_chains(**sample_args) step.population, step.array_population, step.likelihoods = \ step.select_end_points(mtrace) if update is not None: logger.info('Updating Covariances ...') mean_pt = step.mean_end_points() update.update_weights(mean_pt, n_jobs=n_jobs) mtrace = update_last_samples( homepath, step, progressbar, model, n_jobs, rm_flag) step.population, step.array_population, step.likelihoods = \ step.select_end_points(mtrace) step.beta, step.old_beta, step.weights = step.calc_beta() if step.beta > 1.: logger.info('Beta > 1.: %f' % step.beta) step.beta = 1. outparam_list = [step.get_sampler_state(), update] stage_handler.dump_atmip_params(step.stage, outparam_list) if stage == -1: chains = [] else: chains = None else: step.covariance = step.calc_covariance() step.proposal_dist = choose_proposal( step.proposal_name, scale=step.covariance) step.resampling_indexes = step.resample() step.chain_previous_lpoint = \ step.get_chain_previous_lpoint(mtrace) outparam_list = [step.get_sampler_state(), update] stage_handler.dump_atmip_params(step.stage, outparam_list) step.stage += 1 del(mtrace) # Metropolis sampling final stage logger.info('Sample final stage') step.stage = -1 temp = np.exp((1 - step.old_beta) * (step.likelihoods - step.likelihoods.max())) step.weights = temp / np.sum(temp) step.covariance = step.calc_covariance() step.proposal_dist = choose_proposal( step.proposal_name, scale=step.covariance) step.resampling_indexes = step.resample() step.chain_previous_lpoint = step.get_chain_previous_lpoint(mtrace) sample_args['step'] = step sample_args['stage_path'] = stage_handler.stage_path(step.stage) sample_args['chains'] = chains iter_parallel_chains(**sample_args) outparam_list = [step.get_sampler_state(), update] stage_handler.dump_atmip_params(step.stage, outparam_list) logger.info('Finished sampling!')
def sample( *, draws=1000, tune=1000, model=None, step_kwargs=None, warmup_window=50, adapt_window=50, cooldown_window=100, initial_accept=None, target_accept=0.9, gamma=0.05, k=0.75, t0=10, **kwargs, ): # Check that we're in a model context and that all the variables are # continuous model = modelcontext(model) if not all_continuous(model.vars): raise ValueError("NUTS can only be used for models with only " "continuous variables.") start = kwargs.get("start", None) if start is None: start = model.test_point mean = model.dict_to_array(start) update_steps = build_schedule( tune, warmup_window=warmup_window, adapt_window=adapt_window, cooldown_window=cooldown_window, ) potential = QuadPotentialDenseAdapt( model.ndim, initial_mean=mean, initial_weight=10, update_steps=update_steps, ) if "step" in kwargs: step = kwargs["step"] else: if step_kwargs is None: step_kwargs = {} step = pm.NUTS( potential=potential, model=model, target_accept=target_accept, **step_kwargs, ) if "target_accept" in step_kwargs and target_accept is not None: raise ValueError( "'target_accept' cannot be given as a keyword argument and in " "'step_kwargs'") target_accept = step_kwargs.pop("target_accept", target_accept) if initial_accept is None: target = target_accept else: if initial_accept > target_accept: raise ValueError( "initial_accept must be less than or equal to target_accept") target = initial_accept + (target_accept - initial_accept) * np.sqrt( np.arange(len(update_steps)) / (len(update_steps) - 1)) step.step_adapt = WindowedDualAverageAdaptation(update_steps, step.step_size, target, gamma, k, t0) kwargs["step"] = step return pm.sample(draws=draws, tune=tune, model=model, **kwargs)
def __init__(self, vars=None, out_vars=None, covariance=None, scale=1., n_chains=100, tune=True, tune_interval=100, model=None, check_bound=True, likelihood_name='like', proposal_name='MultivariateNormal', coef_variation=1., **kwargs): model = modelcontext(model) if vars is None: vars = model.vars vars = inputvars(vars) if out_vars is None: out_vars = model.unobserved_RVs out_varnames = [out_var.name for out_var in out_vars] self.scaling = np.atleast_1d(scale) if covariance is None and proposal_name == 'MultivariateNormal': self.covariance = np.eye(sum(v.dsize for v in vars)) scale = self.covariance self.tune = tune self.check_bnd = check_bound self.tune_interval = tune_interval self.steps_until_tune = tune_interval self.proposal_name = proposal_name self.proposal_dist = choose_proposal(self.proposal_name, scale=scale) self.proposal_samples_array = self.proposal_dist(n_chains) self.stage_sample = 0 self.accepted = 0 self.beta = 0 self.stage = 0 self.chain_index = 0 self.resampling_indexes = np.arange(n_chains) self.coef_variation = coef_variation self.n_chains = n_chains self.likelihoods = np.zeros(n_chains) self.likelihood_name = likelihood_name self._llk_index = out_varnames.index(likelihood_name) self.discrete = np.concatenate( [[v.dtype in discrete_types] * (v.dsize or 1) for v in vars]) self.any_discrete = self.discrete.any() self.all_discrete = self.discrete.all() # create initial population self.population = [] self.array_population = np.zeros(n_chains) for i in range(self.n_chains): dummy = pm.Point({v.name: v.random() for v in vars}, model=model) self.population.append(dummy) self.population[0] = model.test_point self.chain_previous_lpoint = copy.deepcopy(self.population) shared = make_shared_replacements(vars, model) self.logp_forw = logp_forw(out_vars, vars, shared) self.check_bnd = logp_forw([model.varlogpt], vars, shared) super(ATMCMC, self).__init__(vars, out_vars, shared)
def metropolis_sample(n_steps=10000, homepath=None, start=None, backend='csv', progressbar=False, rm_flag=False, buffer_size=5000, buffer_thinning=1, step=None, model=None, n_jobs=1, update=None, burn=0.5, thin=2): """ Execute Metropolis algorithm repeatedly depending on the number of chains. """ # hardcoded stage here as there are no stages stage = 1 model = modelcontext(model) step.n_steps = int(n_steps) if n_steps < 1: raise TypeError('Argument `n_steps` should be above 0.', exc_info=1) if step is None: raise TypeError('Argument `step` has to be a Metropolis step object.') if homepath is None: raise TypeError( 'Argument `homepath` should be path to result_directory.') if n_jobs > 1: if not (step.n_chains / float(n_jobs)).is_integer(): raise Exception('n_chains / n_jobs has to be a whole number!') if start is not None: if len(start) != step.n_chains: raise Exception('Argument `start` should have dicts equal the ' 'number of chains (step.N-chains)') else: step.population = start if not any(step.likelihood_name in var.name for var in model.deterministics): raise Exception('Model (deterministic) variables need to contain ' 'a variable %s ' 'as defined in `step`.' % step.likelihood_name) stage_handler = backend.SampleStage(homepath, backend=step.backend) util.ensuredir(homepath) chains, step, update = init_stage( stage_handler=stage_handler, step=step, stage=0, # needs zero otherwise tries to load stage_0 results progressbar=progressbar, update=update, model=model, rm_flag=rm_flag) with model: chains = stage_handler.clean_directory(step.stage, chains, rm_flag) logger.info('Sampling stage ...') draws = n_steps step.stage = stage sample_args = { 'draws': draws, 'step': step, 'stage_path': stage_handler.stage_path(step.stage), 'progressbar': progressbar, 'model': model, 'n_jobs': n_jobs, 'buffer_size': buffer_size, 'buffer_thinning': buffer_thinning, 'chains': chains } mtrace = iter_parallel_chains(**sample_args) if step.proposal_name == 'MultivariateNormal': pdict, step.covariance = get_trace_stats(mtrace, step, burn, thin) step.proposal_dist = choose_proposal(step.proposal_name, scale=step.covariance) if update is not None: logger.info('Updating Covariances ...') update.update_weights(pdict['dist_mean'], n_jobs=n_jobs) mtrace = update_last_samples(homepath, step, progressbar, model, n_jobs, rm_flag) elif update is not None and stage == 0: update.engine.close_cashed_stores() step.chain_previous_lpoint = step.get_chain_previous_lpoint(mtrace) outparam_list = [step.get_sampler_state(), update] stage_handler.dump_atmip_params(step.stage, outparam_list) # get_final_stage(homepath, n_stages, model=model) return stage_handler.load_multitrace(step.stage, model=model)
def __init__(self, vars=None, scaling=None, step_scale=0.25, is_cov=False, model=None, blocked=True, potential=None, dtype=None, Emax=1000, target_accept=0.8, gamma=0.05, k=0.75, t0=10, adapt_step_size=True, step_rand=None, **aesara_kwargs): """Set up Hamiltonian samplers with common structures. Parameters ---------- vars: list of aesara variables scaling: array_like, ndim = {1,2} Scaling for momentum distribution. 1d arrays interpreted matrix diagonal. step_scale: float, default=0.25 Size of steps to take, automatically scaled down by 1/n**(1/4) is_cov: bool, default=False Treat scaling as a covariance matrix/vector if True, else treat it as a precision matrix/vector model: pymc3 Model instance blocked: bool, default=True potential: Potential, optional An object that represents the Hamiltonian with methods `velocity`, `energy`, and `random` methods. **aesara_kwargs: passed to aesara functions """ self._model = modelcontext(model) if vars is None: vars = self._model.cont_vars vars = inputvars(vars) super().__init__(vars, blocked=blocked, model=model, dtype=dtype, **aesara_kwargs) self.adapt_step_size = adapt_step_size self.Emax = Emax self.iter_count = 0 size = self._logp_dlogp_func.size self.step_size = step_scale / (size**0.25) self.step_adapt = step_sizes.DualAverageAdaptation( self.step_size, target_accept, gamma, k, t0) self.target_accept = target_accept self.tune = True if scaling is None and potential is None: mean = floatX(np.zeros(size)) var = floatX(np.ones(size)) potential = QuadPotentialDiagAdapt(size, mean, var, 10) if isinstance(scaling, dict): point = Point(scaling, model=model) scaling = guess_scaling(point, model=model, vars=vars) if scaling is not None and potential is not None: raise ValueError("Can not specify both potential and scaling.") if potential is not None: self.potential = potential else: self.potential = quad_potential(scaling, is_cov) self.integrator = integration.CpuLeapfrogIntegrator( self.potential, self._logp_dlogp_func) self._step_rand = step_rand self._warnings = [] self._samples_after_tune = 0 self._num_divs_sample = 0
def __init__(self, vars=None, out_vars=None, covariance=None, scale=1., n_chains=100, tune=True, tune_interval=100, model=None, check_bound=True, likelihood_name='like', backend='csv', proposal_name='MultivariateNormal', **kwargs): model = modelcontext(model) if vars is None: vars = model.vars vars = inputvars(vars) if out_vars is None: out_vars = model.unobserved_RVs out_varnames = [out_var.name for out_var in out_vars] self.scaling = utility.scalar2floatX(num.atleast_1d(scale)) if covariance is None and proposal_name in multivariate_proposals: self.covariance = num.eye(sum(v.dsize for v in vars)) scale = self.covariance elif covariance is None: scale = num.ones(sum(v.dsize for v in vars)) else: scale = covariance self.tune = tune self.check_bound = check_bound self.tune_interval = tune_interval self.steps_until_tune = tune_interval self.proposal_name = proposal_name self.proposal_dist = choose_proposal(self.proposal_name, scale=scale) self.proposal_samples_array = self.proposal_dist(n_chains) self.stage_sample = 0 self.accepted = 0 self.beta = 1. self.stage = 0 self.chain_index = 0 # needed to use the same parallel implementation function as for SMC self.resampling_indexes = num.arange(n_chains) self.n_chains = n_chains self.likelihood_name = likelihood_name self._llk_index = out_varnames.index(likelihood_name) self.backend = backend self.discrete = num.concatenate( [[v.dtype in discrete_types] * (v.dsize or 1) for v in vars]) self.any_discrete = self.discrete.any() self.all_discrete = self.discrete.all() # create initial population self.population = [] self.array_population = num.zeros(n_chains) for i in range(self.n_chains): self.population.append( Point({v.name: v.random() for v in vars}, model=model)) self.population[0] = model.test_point shared = make_shared_replacements(vars, model) self.logp_forw = logp_forw(out_vars, vars, shared) self.check_bnd = logp_forw([model.varlogpt], vars, shared) super(Metropolis, self).__init__(vars, out_vars, shared) self.chain_previous_lpoint = [[]] * self.n_chains self._tps = None
def draw_value(self, param, trace: Optional[_TraceDict] = None, givens=None): """Draw a set of random values from a distribution or return a constant. Parameters ---------- param: number, array like, theano variable or pymc3 random variable The value or distribution. Constants or shared variables will be converted to an array and returned. Theano variables are evaluated. If `param` is a pymc3 random variable, draw values from it and return that (as ``np.ndarray``), unless a value is specified in the ``trace``. trace: pm.MultiTrace, optional A dictionary from pymc3 variable names to samples of their values used to provide context for evaluating ``param``. givens: dict, optional A dictionary from theano variables to their values. These values are used to evaluate ``param`` if it is a theano variable. """ samples = self.samples def random_sample( meth: Callable[..., np.ndarray], param, point: _TraceDict, size: int, shape: Tuple[int, ...], ) -> np.ndarray: val = meth(point=point, size=size) try: assert val.shape == (size, ) + shape, ( "Sampling from random of %s yields wrong shape" % param) # error-quashing here is *extremely* ugly, but it seems to be what the logic in DensityDist wants. except AssertionError as e: if (hasattr(param, "distribution") and hasattr( param.distribution, "wrap_random_with_dist_shape") and not param.distribution.wrap_random_with_dist_shape): pass else: raise e return val if isinstance(param, (numbers.Number, np.ndarray)): return param elif isinstance(param, theano_constant): return param.value elif isinstance(param, tt.sharedvar.SharedVariable): return param.get_value() elif isinstance(param, (tt.TensorVariable, MultiObservedRV)): if hasattr(param, "model") and trace and param.name in trace.varnames: return trace[param.name] elif hasattr(param, "random") and param.random is not None: model = modelcontext(None) assert isinstance(model, Model) shape: Tuple[int, ...] = tuple(_param_shape(param, model)) return random_sample(param.random, param, point=trace, size=samples, shape=shape) elif (hasattr(param, "distribution") and hasattr(param.distribution, "random") and param.distribution.random is not None): if hasattr(param, "observations"): # shape inspection for ObservedRV dist_tmp = param.distribution try: distshape: Tuple[int, ...] = tuple( param.observations.shape.eval()) except AttributeError: distshape = tuple(param.observations.shape) dist_tmp.shape = distshape try: return random_sample( dist_tmp.random, param, point=trace, size=samples, shape=distshape, ) except (ValueError, TypeError): # reset shape to account for shape changes # with theano.shared inputs dist_tmp.shape = () # We want to draw values to infer the dist_shape, # we don't want to store these drawn values to the context with _DrawValuesContextBlocker(): point = trace[0] if trace else None temp_val = np.atleast_1d( dist_tmp.random(point=point, size=None)) # if hasattr(param, 'name') and param.name == 'obs': # import pdb; pdb.set_trace() # Sometimes point may change the size of val but not the # distribution's shape if point and samples is not None: temp_size = np.atleast_1d(samples) if all(temp_val.shape[:len(temp_size)] == temp_size): dist_tmp.shape = tuple( temp_val.shape[len(temp_size):]) else: dist_tmp.shape = tuple(temp_val.shape) # I am not sure why I need to do this, but I do in order to trim off a # degenerate dimension [2019/09/05:rpg] if dist_tmp.shape[0] == 1 and len(dist_tmp.shape) > 1: dist_tmp.shape = dist_tmp.shape[1:] return random_sample( dist_tmp.random, point=trace, size=samples, param=param, shape=tuple(dist_tmp.shape), ) else: # has a distribution, but no observations distshape = tuple(param.distribution.shape) return random_sample( meth=param.distribution.random, param=param, point=trace, size=samples, shape=distshape, ) # NOTE: I think the following is already vectorized. else: if givens: variables, values = list(zip(*givens)) else: variables = values = [] # We only truly care if the ancestors of param that were given # value have the matching dshape and val.shape param_ancestors = set( theano.gof.graph.ancestors([param], blockers=list(variables))) inputs = [(var, val) for var, val in zip(variables, values) if var in param_ancestors] if inputs: input_vars, input_vals = list(zip(*inputs)) else: input_vars = [] input_vals = [] func = _compile_theano_function(param, input_vars) if not input_vars: assert input_vals == [ ] # AFAICT if there are now vars, there can't be vals output = func(*input_vals) if hasattr(output, "shape"): val = np.repeat(np.expand_dims(output, 0), samples, axis=0) else: val = np.full(samples, output) else: val = func(*input_vals) # np.ndarray([func(*input_vals) for inp in zip(*input_vals)]) return val raise ValueError("Unexpected type in draw_value: %s" % type(param))
def find_MAP(start=None, vars=None, method="L-BFGS-B", return_raw=False, include_transformed=True, progressbar=True, maxeval=5000, model=None, *args, **kwargs): """Finds the local maximum a posteriori point given a model. `find_MAP` should not be used to initialize the NUTS sampler. Simply call ``pymc3.sample()`` and it will automatically initialize NUTS in a better way. Parameters ---------- start: `dict` of parameter values (Defaults to `model.initial_point`) vars: list List of variables to optimize and set to optimum (Defaults to all continuous). method: string or callable Optimization algorithm (Defaults to 'L-BFGS-B' unless discrete variables are specified in `vars`, then `Powell` which will perform better). For instructions on use of a callable, refer to SciPy's documentation of `optimize.minimize`. return_raw: bool Whether to return the full output of scipy.optimize.minimize (Defaults to `False`) include_transformed: bool, optional defaults to True Flag for reporting automatically transformed variables in addition to original variables. progressbar: bool, optional defaults to True Whether or not to display a progress bar in the command line. maxeval: int, optional, defaults to 5000 The maximum number of times the posterior distribution is evaluated. model: Model (optional if in `with` context) *args, **kwargs Extra args passed to scipy.optimize.minimize Notes ----- Older code examples used `find_MAP` to initialize the NUTS sampler, but this is not an effective way of choosing starting values for sampling. As a result, we have greatly enhanced the initialization of NUTS and wrapped it inside ``pymc3.sample()`` and you should thus avoid this method. """ model = modelcontext(model) if vars is None: vars = model.cont_vars if not vars: raise ValueError("Model has no unobserved continuous variables.") vars = inputvars(vars) disc_vars = list(typefilter(vars, discrete_types)) allinmodel(vars, model) start = copy.deepcopy(start) if start is None: start = model.initial_point else: model.update_start_vals(start, model.initial_point) model.check_start_vals(start) start = Point(start, model=model) x0 = DictToArrayBijection.map(start) # TODO: If the mapping is fixed, we can simply create graphs for the # mapping and avoid all this bijection overhead def logp_func(x): return DictToArrayBijection.mapf(model.fastlogp_nojac)(RaveledVars( x, x0.point_map_info)) try: # This might be needed for calls to `dlogp_func` # start_map_info = tuple((v.name, v.shape, v.dtype) for v in vars) def dlogp_func(x): return DictToArrayBijection.mapf(model.fastdlogp_nojac(vars))( RaveledVars(x, x0.point_map_info)) compute_gradient = True except (AttributeError, NotImplementedError, tg.NullTypeGradError): compute_gradient = False if disc_vars or not compute_gradient: pm._log.warning( "Warning: gradient not available." + "(E.g. vars contains discrete variables). MAP " + "estimates may not be accurate for the default " + "parameters. Defaulting to non-gradient minimization " + "'Powell'.") method = "Powell" if compute_gradient: cost_func = CostFuncWrapper(maxeval, progressbar, logp_func, dlogp_func) else: cost_func = CostFuncWrapper(maxeval, progressbar, logp_func) try: opt_result = minimize(cost_func, x0.data, method=method, jac=compute_gradient, *args, **kwargs) mx0 = opt_result["x"] # r -> opt_result except (KeyboardInterrupt, StopIteration) as e: mx0, opt_result = cost_func.previous_x, None if isinstance(e, StopIteration): pm._log.info(e) finally: last_v = cost_func.n_eval if progressbar: assert isinstance(cost_func.progress, ProgressBar) cost_func.progress.total = last_v cost_func.progress.update(last_v) print() mx0 = RaveledVars(mx0, x0.point_map_info) vars = get_default_varnames(model.unobserved_value_vars, include_transformed) mx = { var.name: value for var, value in zip( vars, model.fastfn(vars)(DictToArrayBijection.rmap(mx0))) } if return_raw: return mx, opt_result else: return mx