def create_shared_params(self, trace=None, size=None, jitter=1, start=None): if trace is None: if size is None: raise opvi.ParametrizationError( "Need `trace` or `size` to initialize") else: if start is None: start = self.model.test_point else: start_ = self.model.test_point.copy() update_start_vals(start_, start, self.model) start = start_ start = pm.floatX(self.bij.map(start)) # Initialize particles histogram = np.tile(start, (size, 1)) histogram += pm.floatX( np.random.normal(0, jitter, histogram.shape)) else: histogram = np.empty((len(trace) * len(trace.chains), self.ddim)) i = 0 for t in trace.chains: for j in range(len(trace)): histogram[i] = self.bij.map(trace.point(j, t)) i += 1 return dict(histogram=theano.shared(pm.floatX(histogram), "histogram"))
def create_shared_params(self, start=None): if start is None: start = self.model.test_point else: start_ = self.model.test_point.copy() update_start_vals(start_, start, self.model) start = start_ start = self.gbij.map(start) return {'mu': theano.shared( pm.floatX(start), 'mu'), 'rho': theano.shared( pm.floatX(np.zeros((self.global_size,))), 'rho')}
def create_shared_params(self, start=None): if start is None: start = self.model.test_point else: start_ = self.model.test_point.copy() update_start_vals(start_, start, self.model) start = start_ start = self.gbij.map(start) return { 'mu': theano.shared(pm.floatX(start), 'mu'), 'rho': theano.shared(pm.floatX(np.zeros((self.global_size, ))), 'rho') }
def from_noise(cls, size, jitter=.01, local_rv=None, start=None, model=None, random_seed=None, **kwargs): """Initialize Histogram with random noise Parameters ---------- size : `int` number of initial particles jitter : `float` initial sd local_rv : `dict` mapping {model_variable -> local_variable} Local Vars are used for Autoencoding Variational Bayes See (AEVB; Kingma and Welling, 2014) for details start : `Point` initial point model : :class:`pymc3.Model` PyMC3 model for inference random_seed : None or `int` leave None to use package global RandomStream or other valid value to create instance specific one kwargs : other kwargs passed to init Returns ------- :class:`Empirical` """ hist = cls(None, local_rv=local_rv, model=model, random_seed=random_seed, **kwargs) if start is None: start = hist.model.test_point else: start_ = hist.model.test_point.copy() update_start_vals(start_, start, hist.model) start = start_ start = pm.floatX(hist.gbij.map(start)) # Initialize particles x0 = np.tile(start, (size, 1)) x0 += pm.floatX(np.random.normal(0, jitter, x0.shape)) hist.histogram.set_value(x0) return hist
def create_shared_params(self, start=None): if start is None: start = self.model.test_point else: start_ = self.model.test_point.copy() update_start_vals(start_, start, self.model) start = start_ start = pm.floatX(self.gbij.map(start)) n = self.global_size L_tril = (np.eye(n)[np.tril_indices(n)].astype(theano.config.floatX)) return { 'mu': theano.shared(start, 'mu'), 'L_tril': theano.shared(L_tril, 'L_tril') }
def create_shared_params(self, start=None): if start is None: start = self.model.test_point else: start_ = self.model.test_point.copy() update_start_vals(start_, start, self.model) start = start_ start = pm.floatX(self.gbij.map(start)) n = self.global_size L_tril = ( np.eye(n) [np.tril_indices(n)] .astype(theano.config.floatX) ) return {'mu': theano.shared(start, 'mu'), 'L_tril': theano.shared(L_tril, 'L_tril')}
def create_shared_params(self, start=None): if start is None: start = self.model.test_point else: start_ = start.copy() update_start_vals(start_, self.model.test_point, self.model) start = start_ if self.batched: start = start[self.group[0].name][0] else: start = self.bij.map(start) n = self.ddim L_tril = np.eye(n)[np.tril_indices(n)].astype(theano.config.floatX) if self.batched: start = np.tile(start, (self.bdim, 1)) L_tril = np.tile(L_tril, (self.bdim, 1)) return {"mu": theano.shared(start, "mu"), "L_tril": theano.shared(L_tril, "L_tril")}
def from_noise(cls, size, jitter=.01, local_rv=None, start=None, model=None, random_seed=None, **kwargs): """Initialize Histogram with random noise Parameters ---------- size : `int` number of initial particles jitter : `float` initial sd local_rv : `dict` mapping {model_variable -> local_variable} Local Vars are used for Autoencoding Variational Bayes See (AEVB; Kingma and Welling, 2014) for details start : `Point` initial point model : :class:`pymc3.Model` PyMC3 model for inference random_seed : None or `int` leave None to use package global RandomStream or other valid value to create instance specific one kwargs : other kwargs passed to init Returns ------- :class:`Empirical` """ hist = cls( None, local_rv=local_rv, model=model, random_seed=random_seed, **kwargs) if start is None: start = hist.model.test_point else: start_ = hist.model.test_point.copy() update_start_vals(start_, start, hist.model) start = start_ start = pm.floatX(hist.gbij.map(start)) # Initialize particles x0 = np.tile(start, (size, 1)) x0 += pm.floatX(np.random.normal(0, jitter, x0.shape)) hist.histogram.set_value(x0) return hist
def create_shared_params(self, start=None): if start is None: start = self.model.test_point else: start_ = start.copy() update_start_vals(start_, self.model.test_point, self.model) start = start_ if self.batched: start = start[self.group[0].name][0] else: start = self.bij.map(start) rho = np.zeros((self.ddim,)) if self.batched: start = np.tile(start, (self.bdim, 1)) rho = np.tile(rho, (self.bdim, 1)) return {'mu': theano.shared( pm.floatX(start), 'mu'), 'rho': theano.shared( pm.floatX(rho), 'rho')}
def create_shared_params(self, start=None): if start is None: start = self.model.test_point else: start_ = start.copy() update_start_vals(start_, self.model.test_point, self.model) start = start_ if self.batched: start = start[self.group[0].name][0] else: start = self.bij.map(start) n = self.ddim L_tril = ( np.eye(n) [np.tril_indices(n)] .astype(theano.config.floatX) ) if self.batched: start = np.tile(start, (self.bdim, 1)) L_tril = np.tile(L_tril, (self.bdim, 1)) return {'mu': theano.shared(start, 'mu'), 'L_tril': theano.shared(L_tril, 'L_tril')}
def create_shared_params(self, trace=None, size=None, jitter=1, start=None): if trace is None: if size is None: raise opvi.ParametrizationError('Need `trace` or `size` to initialize') else: if start is None: start = self.model.test_point else: start_ = self.model.test_point.copy() update_start_vals(start_, start, self.model) start = start_ start = pm.floatX(self.bij.map(start)) # Initialize particles histogram = np.tile(start, (size, 1)) histogram += pm.floatX(np.random.normal(0, jitter, histogram.shape)) else: histogram = np.empty((len(trace) * len(trace.chains), self.ddim)) i = 0 for t in trace.chains: for j in range(len(trace)): histogram[i] = self.bij.map(trace.point(j, t)) i += 1 return dict(histogram=theano.shared(pm.floatX(histogram), 'histogram'))
def __init__(self, start=None, vars=None, model=None): model = self.model = pm.modelcontext(model) # Work out the full starting coordinates if start is None: start = model.test_point else: update_start_vals(start, model.test_point, model) self.start = start # Fit all the parameters by default if vars is None: vars = model.cont_vars vars = self.vars = inputvars(vars) allinmodel(vars, model) # Work out the relevant bijection map start = Point(start, model=model) self.bij = DictToArrayBijection(ArrayOrdering(vars), start) # Pre-compile the theano model and gradient nlp = -model.logpt grad = theano.grad(nlp, vars, disconnected_inputs="ignore") self.func = get_theano_function_for_var([nlp] + grad, model=model)
def optimize(start=None, vars=None, model=None, return_info=False, verbose=True, **kwargs): """Maximize the log prob of a PyMC3 model using scipy All extra arguments are passed directly to the ``scipy.optimize.minimize`` function. Args: start: The PyMC3 coordinate dictionary of the starting position vars: The variables to optimize model: The PyMC3 model return_info: Return both the coordinate dictionary and the result of ``scipy.optimize.minimize`` verbose: Print the success flag and log probability to the screen """ from scipy.optimize import minimize model = pm.modelcontext(model) # Work out the full starting coordinates if start is None: start = model.test_point else: update_start_vals(start, model.test_point, model) # Fit all the parameters by default if vars is None: vars = model.cont_vars vars = inputvars(vars) allinmodel(vars, model) # Work out the relevant bijection map start = Point(start, model=model) bij = DictToArrayBijection(ArrayOrdering(vars), start) # Pre-compile the theano model and gradient nlp = -model.logpt grad = theano.grad(nlp, vars, disconnected_inputs="ignore") func = get_theano_function_for_var([nlp] + grad, model=model) if verbose: names = [ get_untransformed_name(v.name) if is_transformed_name(v.name) else v.name for v in vars ] sys.stderr.write("optimizing logp for variables: [{0}]\n".format( ", ".join(names))) bar = tqdm.tqdm() # This returns the objective function and its derivatives def objective(vec): res = func(*get_args_for_theano_function(bij.rmap(vec), model=model)) d = dict(zip((v.name for v in vars), res[1:])) g = bij.map(d) if verbose: bar.set_postfix(logp="{0:e}".format(-res[0])) bar.update() return res[0], g # Optimize using scipy.optimize x0 = bij.map(start) initial = objective(x0)[0] kwargs["jac"] = True info = minimize(objective, x0, **kwargs) # Only accept the output if it is better than it was x = info.x if (np.isfinite(info.fun) and info.fun < initial) else x0 # Coerce the output into the right format vars = get_default_varnames(model.unobserved_RVs, True) point = { var.name: value for var, value in zip(vars, model.fastfn(vars)(bij.rmap(x))) } if verbose: bar.close() sys.stderr.write("message: {0}\n".format(info.message)) sys.stderr.write("logp: {0} -> {1}\n".format(-initial, -info.fun)) if not np.isfinite(info.fun): logger.warning("final logp not finite, returning initial point") logger.warning( "this suggests that something is wrong with the model") logger.debug("{0}".format(info)) if return_info: return point, info return point
def find_MAP(start=None, vars=None, method="L-BFGS-B", return_raw=False, include_transformed=True, progressbar=True, maxeval=5000, model=None, *args, **kwargs): """ Finds the local maximum a posteriori point given a model. find_MAP should not be used to initialize the NUTS sampler. Simply call pymc3.sample() and it will automatically initialize NUTS in a better way. Parameters ---------- start: `dict` of parameter values (Defaults to `model.test_point`) vars: list List of variables to optimize and set to optimum (Defaults to all continuous). method: string or callable Optimization algorithm (Defaults to 'L-BFGS-B' unless discrete variables are specified in `vars`, then `Powell` which will perform better). For instructions on use of a callable, refer to SciPy's documentation of `optimize.minimize`. return_raw: bool Whether to return the full output of scipy.optimize.minimize (Defaults to `False`) include_transformed: bool, optional defaults to True Flag for reporting automatically transformed variables in addition to original variables. progressbar: bool, optional defaults to True Whether or not to display a progress bar in the command line. maxeval: int, optional, defaults to 5000 The maximum number of times the posterior distribution is evaluated. model: Model (optional if in `with` context) *args, **kwargs Extra args passed to scipy.optimize.minimize Notes ----- Older code examples used find_MAP() to initialize the NUTS sampler, but this is not an effective way of choosing starting values for sampling. As a result, we have greatly enhanced the initialization of NUTS and wrapped it inside pymc3.sample() and you should thus avoid this method. """ model = modelcontext(model) if start is None: start = model.test_point else: update_start_vals(start, model.test_point, model) check_start_vals(start, model) if vars is None: vars = model.cont_vars vars = inputvars(vars) disc_vars = list(typefilter(vars, discrete_types)) allinmodel(vars, model) start = Point(start, model=model) bij = DictToArrayBijection(ArrayOrdering(vars), start) logp_func = bij.mapf(model.fastlogp_nojac) x0 = bij.map(start) try: dlogp_func = bij.mapf(model.fastdlogp_nojac(vars)) compute_gradient = True except (AttributeError, NotImplementedError, tg.NullTypeGradError): compute_gradient = False if disc_vars or not compute_gradient: pm._log.warning( "Warning: gradient not available." + "(E.g. vars contains discrete variables). MAP " + "estimates may not be accurate for the default " + "parameters. Defaulting to non-gradient minimization " + "'Powell'.") method = "Powell" if "fmin" in kwargs: fmin = kwargs.pop("fmin") warnings.warn( "In future versions, set the optimization algorithm with a string. " 'For example, use `method="L-BFGS-B"` instead of ' '`fmin=sp.optimize.fmin_l_bfgs_b"`.') cost_func = CostFuncWrapper(maxeval, progressbar, logp_func) # Check to see if minimization function actually uses the gradient if "fprime" in getargspec(fmin).args: def grad_logp(point): return nan_to_num(-dlogp_func(point)) opt_result = fmin(cost_func, x0, fprime=grad_logp, *args, **kwargs) else: # Check to see if minimization function uses a starting value if "x0" in getargspec(fmin).args: opt_result = fmin(cost_func, x0, *args, **kwargs) else: opt_result = fmin(cost_func, *args, **kwargs) if isinstance(opt_result, tuple): mx0 = opt_result[0] else: mx0 = opt_result else: # remove 'if' part, keep just this 'else' block after version change if compute_gradient: cost_func = CostFuncWrapper(maxeval, progressbar, logp_func, dlogp_func) else: cost_func = CostFuncWrapper(maxeval, progressbar, logp_func) try: opt_result = minimize(cost_func, x0, method=method, jac=compute_gradient, *args, **kwargs) mx0 = opt_result["x"] # r -> opt_result except (KeyboardInterrupt, StopIteration) as e: mx0, opt_result = cost_func.previous_x, None if isinstance(e, StopIteration): pm._log.info(e) finally: last_v = cost_func.n_eval if progressbar: assert isinstance(cost_func.progress, ProgressBar) cost_func.progress.total = last_v cost_func.progress.update(last_v) print() vars = get_default_varnames(model.unobserved_RVs, include_transformed) mx = { var.name: value for var, value in zip(vars, model.fastfn(vars)(bij.rmap(mx0))) } if return_raw: return mx, opt_result else: return mx