Example #1
0
def cont_inputs(a):
    """
    Get the continuous inputs into Aesara variables

    Parameters
    ----------
        a: Aesara variable

    Returns
    -------
        r: list of tensor variables that are continuous inputs
    """
    return typefilter(inputvars(a), continuous_types)
def find_MAP(start=None,
             vars=None,
             fmin=None,
             return_raw=False,
             disp=False,
             model=None,
             *args,
             **kwargs):
    """

    CONTAINS LOCAL HACK TO AVOID CALCULATING THE GRADIENT WHEN WE DON'T NEED IT.
    DOING SO CAUSES MEMORY OVERFLOWS WHEN USING LARGE DATASET (100000 x 15ish )

    Sets state to the local maximum a posteriori point given a model.
    Current default of fmin_Hessian does not deal well with optimizing close
    to sharp edges, especially if they are the minimum.

    Parameters
    ----------
    start : `dict` of parameter values (Defaults to `model.test_point`)
    vars : list
        List of variables to set to MAP point (Defaults to all continuous).
    fmin : function
        Optimization algorithm (Defaults to `scipy.optimize.fmin_bfgs` unless
        discrete variables are specified in `vars`, then
        `scipy.optimize.fmin_powell` which will perform better).
    return_raw : Bool
        Whether to return extra value returned by fmin (Defaults to `False`)
    disp : Bool
        Display helpful warnings, and verbose output of `fmin` (Defaults to
        `False`)
    model : Model (optional if in `with` context)
    *args, **kwargs
        Extra args passed to fmin
    """
    model = modelcontext(model)
    if start is None:
        start = model.test_point

    if vars is None:
        vars = model.cont_vars

    vars = inputvars(vars)

    disc_vars = list(typefilter(vars, discrete_types))

    if disc_vars and disp:
        print("Warning: vars contains discrete variables. MAP " +
              "estimates may not be accurate for the default " +
              "parameters. Defaulting to non-gradient minimization " +
              "fmin_powell.")

    if fmin is None:
        if disc_vars:
            fmin = optimize.fmin_powell
        else:
            fmin = optimize.fmin_bfgs

    allinmodel(vars, model)

    start = Point(start, model=model)
    bij = DictToArrayBijection(ArrayOrdering(vars), start)

    logp = bij.mapf(model.fastlogp)

    ## dlogp = bij.mapf(model.fastdlogp(vars))

    def logp_o(point):
        return nan_to_high(-logp(point))

    def grad_logp_o(point):
        return nan_to_num(-dlogp(point))

    # Check to see if minimization function actually uses the gradient
    ## if 'fprime' in getargspec(fmin).args:
    ##     r = fmin(logp_o, bij.map(
    ##         start), fprime=grad_logp_o, disp=disp, *args, **kwargs)
    ## else:
    r = fmin(logp_o, bij.map(start), disp=disp, *args, **kwargs)

    if isinstance(r, tuple):
        mx0 = r[0]
    else:
        mx0 = r

    mx = bij.rmap(mx0)

    if (not allfinite(mx0) or not allfinite(model.logp(mx))  ## or
            ## not allfinite(model.dlogp()(mx))
        ):

        messages = []
        for var in vars:

            vals = {
                "value": mx[var.name],
                "logp": var.logp(mx)  ## ,
                ## "dlogp"   : var.dlogp()(mx)
            }

            def message(name, values):
                if np.size(values) < 10:
                    return name + " bad: " + str(values)
                else:
                    idx = np.nonzero(logical_not(isfinite(values)))
                    return name + " bad at idx: " + str(
                        idx) + " with values: " + str(values[idx])

            messages += [
                message(var.name + "." + k, v) for k, v in vals.items()
                if not allfinite(v)
            ]

        specific_errors = '\n'.join(messages)
        raise ValueError("Optimization error: max, logp " +  ## or dlogp at " +
                         "max have non-finite values. Some values may be " +
                         "outside of distribution support. max: " + repr(mx) +
                         " logp: " + repr(model.logp(mx)) +
                         ## " dlogp: " + repr(model.dlogp()(mx)) +
                         "Check that " +
                         "1) you don't have hierarchical parameters, " +
                         "these will lead to points with infinite " +
                         "density. 2) your distribution logp's are " +
                         "properly specified. Specific issues: \n" +
                         specific_errors)

    mx = {v.name: mx[v.name].astype(v.dtype) for v in model.vars}

    if return_raw:
        return mx, r
    else:
        return mx
Example #3
0
def find_MAP(start=None,
             vars=None,
             method="L-BFGS-B",
             return_raw=False,
             include_transformed=True,
             progressbar=True,
             maxeval=5000,
             model=None,
             *args,
             seed: Optional[int] = None,
             **kwargs):
    """Finds the local maximum a posteriori point given a model.

    `find_MAP` should not be used to initialize the NUTS sampler. Simply call
    ``pymc.sample()`` and it will automatically initialize NUTS in a better
    way.

    Parameters
    ----------
    start: `dict` of parameter values (Defaults to `model.initial_point`)
    vars: list
        List of variables to optimize and set to optimum (Defaults to all continuous).
    method: string or callable
        Optimization algorithm (Defaults to 'L-BFGS-B' unless
        discrete variables are specified in `vars`, then
        `Powell` which will perform better).  For instructions on use of a callable,
        refer to SciPy's documentation of `optimize.minimize`.
    return_raw: bool
        Whether to return the full output of scipy.optimize.minimize (Defaults to `False`)
    include_transformed: bool, optional defaults to True
        Flag for reporting automatically transformed variables in addition
        to original variables.
    progressbar: bool, optional defaults to True
        Whether or not to display a progress bar in the command line.
    maxeval: int, optional, defaults to 5000
        The maximum number of times the posterior distribution is evaluated.
    model: Model (optional if in `with` context)
    *args, **kwargs
        Extra args passed to scipy.optimize.minimize

    Notes
    -----
    Older code examples used `find_MAP` to initialize the NUTS sampler,
    but this is not an effective way of choosing starting values for sampling.
    As a result, we have greatly enhanced the initialization of NUTS and
    wrapped it inside ``pymc.sample()`` and you should thus avoid this method.
    """
    model = modelcontext(model)

    if vars is None:
        vars = model.cont_vars
        if not vars:
            raise ValueError("Model has no unobserved continuous variables.")
    vars = inputvars(vars)
    disc_vars = list(typefilter(vars, discrete_types))
    allinmodel(vars, model)
    ipfn = make_initial_point_fn(
        model=model,
        jitter_rvs={},
        return_transformed=True,
        overrides=start,
    )
    if seed is None:
        seed = model.rng_seeder.randint(2**30, dtype=np.int64)
    start = ipfn(seed)
    model.check_start_vals(start)

    x0 = DictToArrayBijection.map(start)

    # TODO: If the mapping is fixed, we can simply create graphs for the
    # mapping and avoid all this bijection overhead
    def logp_func(x):
        return DictToArrayBijection.mapf(model.fastlogp_nojac)(RaveledVars(
            x, x0.point_map_info))

    try:
        # This might be needed for calls to `dlogp_func`
        # start_map_info = tuple((v.name, v.shape, v.dtype) for v in vars)

        def dlogp_func(x):
            return DictToArrayBijection.mapf(model.fastdlogp_nojac(vars))(
                RaveledVars(x, x0.point_map_info))

        compute_gradient = True
    except (AttributeError, NotImplementedError, tg.NullTypeGradError):
        compute_gradient = False

    if disc_vars or not compute_gradient:
        pm._log.warning(
            "Warning: gradient not available." +
            "(E.g. vars contains discrete variables). MAP " +
            "estimates may not be accurate for the default " +
            "parameters. Defaulting to non-gradient minimization " +
            "'Powell'.")
        method = "Powell"

    if compute_gradient:
        cost_func = CostFuncWrapper(maxeval, progressbar, logp_func,
                                    dlogp_func)
    else:
        cost_func = CostFuncWrapper(maxeval, progressbar, logp_func)

    try:
        opt_result = minimize(cost_func,
                              x0.data,
                              method=method,
                              jac=compute_gradient,
                              *args,
                              **kwargs)
        mx0 = opt_result["x"]  # r -> opt_result
    except (KeyboardInterrupt, StopIteration) as e:
        mx0, opt_result = cost_func.previous_x, None
        if isinstance(e, StopIteration):
            pm._log.info(e)
    finally:
        last_v = cost_func.n_eval
        if progressbar:
            assert isinstance(cost_func.progress, ProgressBar)
            cost_func.progress.total = last_v
            cost_func.progress.update(last_v)
            print(file=sys.stdout)

    mx0 = RaveledVars(mx0, x0.point_map_info)

    vars = get_default_varnames(model.unobserved_value_vars,
                                include_transformed)
    mx = {
        var.name: value
        for var, value in zip(
            vars,
            model.fastfn(vars)(DictToArrayBijection.rmap(mx0)))
    }

    if return_raw:
        return mx, opt_result
    else:
        return mx
Example #4
0
def find_MAP(start=None, vars=None, fmin=None, return_raw=False, disp=False, model=None, *args, **kwargs):
    """

    CONTAINS LOCAL HACK TO AVOID CALCULATING THE GRADIENT WHEN WE DON'T NEED IT.
    DOING SO CAUSES MEMORY OVERFLOWS WHEN USING LARGE DATASET (100000 x 15ish )

    Sets state to the local maximum a posteriori point given a model.
    Current default of fmin_Hessian does not deal well with optimizing close
    to sharp edges, especially if they are the minimum.

    Parameters
    ----------
    start : `dict` of parameter values (Defaults to `model.test_point`)
    vars : list
        List of variables to set to MAP point (Defaults to all continuous).
    fmin : function
        Optimization algorithm (Defaults to `scipy.optimize.fmin_bfgs` unless
        discrete variables are specified in `vars`, then
        `scipy.optimize.fmin_powell` which will perform better).
    return_raw : Bool
        Whether to return extra value returned by fmin (Defaults to `False`)
    disp : Bool
        Display helpful warnings, and verbose output of `fmin` (Defaults to
        `False`)
    model : Model (optional if in `with` context)
    *args, **kwargs
        Extra args passed to fmin
    """
    model = modelcontext(model)
    if start is None:
        start = model.test_point

    if vars is None:
        vars = model.cont_vars

    vars = inputvars(vars)

    disc_vars = list(typefilter(vars, discrete_types))

    if disc_vars and disp:
        print(
            "Warning: vars contains discrete variables. MAP "
            + "estimates may not be accurate for the default "
            + "parameters. Defaulting to non-gradient minimization "
            + "fmin_powell."
        )

    if fmin is None:
        if disc_vars:
            fmin = optimize.fmin_powell
        else:
            fmin = optimize.fmin_bfgs

    allinmodel(vars, model)

    start = Point(start, model=model)
    bij = DictToArrayBijection(ArrayOrdering(vars), start)

    logp = bij.mapf(model.fastlogp)
    ## dlogp = bij.mapf(model.fastdlogp(vars))

    def logp_o(point):
        return nan_to_high(-logp(point))

    def grad_logp_o(point):
        return nan_to_num(-dlogp(point))

    # Check to see if minimization function actually uses the gradient
    ## if 'fprime' in getargspec(fmin).args:
    ##     r = fmin(logp_o, bij.map(
    ##         start), fprime=grad_logp_o, disp=disp, *args, **kwargs)
    ## else:
    r = fmin(logp_o, bij.map(start), disp=disp, *args, **kwargs)

    if isinstance(r, tuple):
        mx0 = r[0]
    else:
        mx0 = r

    mx = bij.rmap(mx0)

    if (
        not allfinite(mx0)
        or not allfinite(model.logp(mx))  ## or
        ## not allfinite(model.dlogp()(mx))
    ):

        messages = []
        for var in vars:

            vals = {
                "value": mx[var.name],
                "logp": var.logp(mx)  ## ,
                ## "dlogp"   : var.dlogp()(mx)
            }

            def message(name, values):
                if np.size(values) < 10:
                    return name + " bad: " + str(values)
                else:
                    idx = np.nonzero(logical_not(isfinite(values)))
                    return name + " bad at idx: " + str(idx) + " with values: " + str(values[idx])

            messages += [message(var.name + "." + k, v) for k, v in vals.items() if not allfinite(v)]

        specific_errors = "\n".join(messages)
        raise ValueError(
            "Optimization error: max, logp "
            + "max have non-finite values. Some values may be "  ## or dlogp at " +
            + "outside of distribution support. max: "
            + repr(mx)
            + " logp: "
            + repr(model.logp(mx))
            +
            ## " dlogp: " + repr(model.dlogp()(mx)) +
            "Check that "
            + "1) you don't have hierarchical parameters, "
            + "these will lead to points with infinite "
            + "density. 2) your distribution logp's are "
            + "properly specified. Specific issues: \n"
            + specific_errors
        )

    mx = {v.name: mx[v.name].astype(v.dtype) for v in model.vars}

    if return_raw:
        return mx, r
    else:
        return mx