def cont_inputs(a): """ Get the continuous inputs into Aesara variables Parameters ---------- a: Aesara variable Returns ------- r: list of tensor variables that are continuous inputs """ return typefilter(inputvars(a), continuous_types)
def find_MAP(start=None, vars=None, fmin=None, return_raw=False, disp=False, model=None, *args, **kwargs): """ CONTAINS LOCAL HACK TO AVOID CALCULATING THE GRADIENT WHEN WE DON'T NEED IT. DOING SO CAUSES MEMORY OVERFLOWS WHEN USING LARGE DATASET (100000 x 15ish ) Sets state to the local maximum a posteriori point given a model. Current default of fmin_Hessian does not deal well with optimizing close to sharp edges, especially if they are the minimum. Parameters ---------- start : `dict` of parameter values (Defaults to `model.test_point`) vars : list List of variables to set to MAP point (Defaults to all continuous). fmin : function Optimization algorithm (Defaults to `scipy.optimize.fmin_bfgs` unless discrete variables are specified in `vars`, then `scipy.optimize.fmin_powell` which will perform better). return_raw : Bool Whether to return extra value returned by fmin (Defaults to `False`) disp : Bool Display helpful warnings, and verbose output of `fmin` (Defaults to `False`) model : Model (optional if in `with` context) *args, **kwargs Extra args passed to fmin """ model = modelcontext(model) if start is None: start = model.test_point if vars is None: vars = model.cont_vars vars = inputvars(vars) disc_vars = list(typefilter(vars, discrete_types)) if disc_vars and disp: print("Warning: vars contains discrete variables. MAP " + "estimates may not be accurate for the default " + "parameters. Defaulting to non-gradient minimization " + "fmin_powell.") if fmin is None: if disc_vars: fmin = optimize.fmin_powell else: fmin = optimize.fmin_bfgs allinmodel(vars, model) start = Point(start, model=model) bij = DictToArrayBijection(ArrayOrdering(vars), start) logp = bij.mapf(model.fastlogp) ## dlogp = bij.mapf(model.fastdlogp(vars)) def logp_o(point): return nan_to_high(-logp(point)) def grad_logp_o(point): return nan_to_num(-dlogp(point)) # Check to see if minimization function actually uses the gradient ## if 'fprime' in getargspec(fmin).args: ## r = fmin(logp_o, bij.map( ## start), fprime=grad_logp_o, disp=disp, *args, **kwargs) ## else: r = fmin(logp_o, bij.map(start), disp=disp, *args, **kwargs) if isinstance(r, tuple): mx0 = r[0] else: mx0 = r mx = bij.rmap(mx0) if (not allfinite(mx0) or not allfinite(model.logp(mx)) ## or ## not allfinite(model.dlogp()(mx)) ): messages = [] for var in vars: vals = { "value": mx[var.name], "logp": var.logp(mx) ## , ## "dlogp" : var.dlogp()(mx) } def message(name, values): if np.size(values) < 10: return name + " bad: " + str(values) else: idx = np.nonzero(logical_not(isfinite(values))) return name + " bad at idx: " + str( idx) + " with values: " + str(values[idx]) messages += [ message(var.name + "." + k, v) for k, v in vals.items() if not allfinite(v) ] specific_errors = '\n'.join(messages) raise ValueError("Optimization error: max, logp " + ## or dlogp at " + "max have non-finite values. Some values may be " + "outside of distribution support. max: " + repr(mx) + " logp: " + repr(model.logp(mx)) + ## " dlogp: " + repr(model.dlogp()(mx)) + "Check that " + "1) you don't have hierarchical parameters, " + "these will lead to points with infinite " + "density. 2) your distribution logp's are " + "properly specified. Specific issues: \n" + specific_errors) mx = {v.name: mx[v.name].astype(v.dtype) for v in model.vars} if return_raw: return mx, r else: return mx
def find_MAP(start=None, vars=None, method="L-BFGS-B", return_raw=False, include_transformed=True, progressbar=True, maxeval=5000, model=None, *args, seed: Optional[int] = None, **kwargs): """Finds the local maximum a posteriori point given a model. `find_MAP` should not be used to initialize the NUTS sampler. Simply call ``pymc.sample()`` and it will automatically initialize NUTS in a better way. Parameters ---------- start: `dict` of parameter values (Defaults to `model.initial_point`) vars: list List of variables to optimize and set to optimum (Defaults to all continuous). method: string or callable Optimization algorithm (Defaults to 'L-BFGS-B' unless discrete variables are specified in `vars`, then `Powell` which will perform better). For instructions on use of a callable, refer to SciPy's documentation of `optimize.minimize`. return_raw: bool Whether to return the full output of scipy.optimize.minimize (Defaults to `False`) include_transformed: bool, optional defaults to True Flag for reporting automatically transformed variables in addition to original variables. progressbar: bool, optional defaults to True Whether or not to display a progress bar in the command line. maxeval: int, optional, defaults to 5000 The maximum number of times the posterior distribution is evaluated. model: Model (optional if in `with` context) *args, **kwargs Extra args passed to scipy.optimize.minimize Notes ----- Older code examples used `find_MAP` to initialize the NUTS sampler, but this is not an effective way of choosing starting values for sampling. As a result, we have greatly enhanced the initialization of NUTS and wrapped it inside ``pymc.sample()`` and you should thus avoid this method. """ model = modelcontext(model) if vars is None: vars = model.cont_vars if not vars: raise ValueError("Model has no unobserved continuous variables.") vars = inputvars(vars) disc_vars = list(typefilter(vars, discrete_types)) allinmodel(vars, model) ipfn = make_initial_point_fn( model=model, jitter_rvs={}, return_transformed=True, overrides=start, ) if seed is None: seed = model.rng_seeder.randint(2**30, dtype=np.int64) start = ipfn(seed) model.check_start_vals(start) x0 = DictToArrayBijection.map(start) # TODO: If the mapping is fixed, we can simply create graphs for the # mapping and avoid all this bijection overhead def logp_func(x): return DictToArrayBijection.mapf(model.fastlogp_nojac)(RaveledVars( x, x0.point_map_info)) try: # This might be needed for calls to `dlogp_func` # start_map_info = tuple((v.name, v.shape, v.dtype) for v in vars) def dlogp_func(x): return DictToArrayBijection.mapf(model.fastdlogp_nojac(vars))( RaveledVars(x, x0.point_map_info)) compute_gradient = True except (AttributeError, NotImplementedError, tg.NullTypeGradError): compute_gradient = False if disc_vars or not compute_gradient: pm._log.warning( "Warning: gradient not available." + "(E.g. vars contains discrete variables). MAP " + "estimates may not be accurate for the default " + "parameters. Defaulting to non-gradient minimization " + "'Powell'.") method = "Powell" if compute_gradient: cost_func = CostFuncWrapper(maxeval, progressbar, logp_func, dlogp_func) else: cost_func = CostFuncWrapper(maxeval, progressbar, logp_func) try: opt_result = minimize(cost_func, x0.data, method=method, jac=compute_gradient, *args, **kwargs) mx0 = opt_result["x"] # r -> opt_result except (KeyboardInterrupt, StopIteration) as e: mx0, opt_result = cost_func.previous_x, None if isinstance(e, StopIteration): pm._log.info(e) finally: last_v = cost_func.n_eval if progressbar: assert isinstance(cost_func.progress, ProgressBar) cost_func.progress.total = last_v cost_func.progress.update(last_v) print(file=sys.stdout) mx0 = RaveledVars(mx0, x0.point_map_info) vars = get_default_varnames(model.unobserved_value_vars, include_transformed) mx = { var.name: value for var, value in zip( vars, model.fastfn(vars)(DictToArrayBijection.rmap(mx0))) } if return_raw: return mx, opt_result else: return mx
def find_MAP(start=None, vars=None, fmin=None, return_raw=False, disp=False, model=None, *args, **kwargs): """ CONTAINS LOCAL HACK TO AVOID CALCULATING THE GRADIENT WHEN WE DON'T NEED IT. DOING SO CAUSES MEMORY OVERFLOWS WHEN USING LARGE DATASET (100000 x 15ish ) Sets state to the local maximum a posteriori point given a model. Current default of fmin_Hessian does not deal well with optimizing close to sharp edges, especially if they are the minimum. Parameters ---------- start : `dict` of parameter values (Defaults to `model.test_point`) vars : list List of variables to set to MAP point (Defaults to all continuous). fmin : function Optimization algorithm (Defaults to `scipy.optimize.fmin_bfgs` unless discrete variables are specified in `vars`, then `scipy.optimize.fmin_powell` which will perform better). return_raw : Bool Whether to return extra value returned by fmin (Defaults to `False`) disp : Bool Display helpful warnings, and verbose output of `fmin` (Defaults to `False`) model : Model (optional if in `with` context) *args, **kwargs Extra args passed to fmin """ model = modelcontext(model) if start is None: start = model.test_point if vars is None: vars = model.cont_vars vars = inputvars(vars) disc_vars = list(typefilter(vars, discrete_types)) if disc_vars and disp: print( "Warning: vars contains discrete variables. MAP " + "estimates may not be accurate for the default " + "parameters. Defaulting to non-gradient minimization " + "fmin_powell." ) if fmin is None: if disc_vars: fmin = optimize.fmin_powell else: fmin = optimize.fmin_bfgs allinmodel(vars, model) start = Point(start, model=model) bij = DictToArrayBijection(ArrayOrdering(vars), start) logp = bij.mapf(model.fastlogp) ## dlogp = bij.mapf(model.fastdlogp(vars)) def logp_o(point): return nan_to_high(-logp(point)) def grad_logp_o(point): return nan_to_num(-dlogp(point)) # Check to see if minimization function actually uses the gradient ## if 'fprime' in getargspec(fmin).args: ## r = fmin(logp_o, bij.map( ## start), fprime=grad_logp_o, disp=disp, *args, **kwargs) ## else: r = fmin(logp_o, bij.map(start), disp=disp, *args, **kwargs) if isinstance(r, tuple): mx0 = r[0] else: mx0 = r mx = bij.rmap(mx0) if ( not allfinite(mx0) or not allfinite(model.logp(mx)) ## or ## not allfinite(model.dlogp()(mx)) ): messages = [] for var in vars: vals = { "value": mx[var.name], "logp": var.logp(mx) ## , ## "dlogp" : var.dlogp()(mx) } def message(name, values): if np.size(values) < 10: return name + " bad: " + str(values) else: idx = np.nonzero(logical_not(isfinite(values))) return name + " bad at idx: " + str(idx) + " with values: " + str(values[idx]) messages += [message(var.name + "." + k, v) for k, v in vals.items() if not allfinite(v)] specific_errors = "\n".join(messages) raise ValueError( "Optimization error: max, logp " + "max have non-finite values. Some values may be " ## or dlogp at " + + "outside of distribution support. max: " + repr(mx) + " logp: " + repr(model.logp(mx)) + ## " dlogp: " + repr(model.dlogp()(mx)) + "Check that " + "1) you don't have hierarchical parameters, " + "these will lead to points with infinite " + "density. 2) your distribution logp's are " + "properly specified. Specific issues: \n" + specific_errors ) mx = {v.name: mx[v.name].astype(v.dtype) for v in model.vars} if return_raw: return mx, r else: return mx