Ejemplo n.º 1
0
    def __init__(self, vars=None, scaling=None, step_scale=0.25, is_cov=False,
                 model=None, blocked=True, potential=None,
                 integrator="leapfrog", dtype=None, **theano_kwargs):
        """Set up Hamiltonian samplers with common structures.

        Parameters
        ----------
        vars : list of theano variables
        scaling : array_like, ndim = {1,2}
            Scaling for momentum distribution. 1d arrays interpreted matrix
            diagonal.
        step_scale : float, default=0.25
            Size of steps to take, automatically scaled down by 1/n**(1/4)
        is_cov : bool, default=False
            Treat scaling as a covariance matrix/vector if True, else treat
            it as a precision matrix/vector
        model : pymc3 Model instance
        blocked: bool, default=True
        potential : Potential, optional
            An object that represents the Hamiltonian with methods `velocity`,
            `energy`, and `random` methods.
        **theano_kwargs: passed to theano functions
        """
        model = modelcontext(model)

        if vars is None:
            vars = model.cont_vars
        vars = inputvars(vars)

        super(BaseHMC, self).__init__(vars, blocked=blocked, model=model,
                                      dtype=dtype, **theano_kwargs)

        size = self._logp_dlogp_func.size

        if scaling is None and potential is None:
            mean = floatX(np.zeros(size))
            var = floatX(np.ones(size))
            potential = QuadPotentialDiagAdapt(size, mean, var, 10)

        if isinstance(scaling, dict):
            point = Point(scaling, model=model)
            scaling = guess_scaling(point, model=model, vars=vars)

        if scaling is not None and potential is not None:
            raise ValueError("Can not specify both potential and scaling.")

        self.step_size = step_scale / (size ** 0.25)
        if potential is not None:
            self.potential = potential
        else:
            self.potential = quad_potential(scaling, is_cov)

        self.integrator = integration.CpuLeapfrogIntegrator(self.potential, self._logp_dlogp_func)
Ejemplo n.º 2
0
    def __init__(self, vars=None, scaling=None, step_scale=0.25, is_cov=False,
                 model=None, blocked=True, use_single_leapfrog=False,
                 potential=None, integrator="leapfrog", **theano_kwargs):
        """Superclass to implement Hamiltonian/hybrid monte carlo

        Parameters
        ----------
        vars : list of theano variables
        scaling : array_like, ndim = {1,2}
            Scaling for momentum distribution. 1d arrays interpreted matrix diagonal.
        step_scale : float, default=0.25
            Size of steps to take, automatically scaled down by 1/n**(1/4)
        is_cov : bool, default=False
            Treat scaling as a covariance matrix/vector if True, else treat it as a
            precision matrix/vector
        model : pymc3 Model instance.  default=Context model
        blocked: Boolean, default True
        use_single_leapfrog: Boolean, will leapfrog steps take a single step at a time.
            default False.
        potential : Potential, optional
            An object that represents the Hamiltonian with methods `velocity`,
            `energy`, and `random` methods.
        **theano_kwargs: passed to theano functions
        """
        model = modelcontext(model)

        if vars is None:
            vars = model.cont_vars
        vars = inputvars(vars)

        if scaling is None and potential is None:
            scaling = model.test_point

        if isinstance(scaling, dict):
            scaling = guess_scaling(Point(scaling, model=model), model=model, vars=vars)

        if scaling is not None and potential is not None:
            raise ValueError("Can not specify both potential and scaling.")

        self.step_size = step_scale / (model.ndim ** 0.25)
        if potential is not None:
            self.potential = potential
        else:
            self.potential = quad_potential(scaling, is_cov, as_cov=False)

        shared = make_shared_replacements(vars, model)
        if theano_kwargs is None:
            theano_kwargs = {}

        self.H, self.compute_energy, self.compute_velocity, self.leapfrog, self.dlogp = get_theano_hamiltonian_functions(
            vars, shared, model.logpt, self.potential, use_single_leapfrog, integrator, **theano_kwargs)

        super(BaseHMC, self).__init__(vars, shared, blocked=blocked)
Ejemplo n.º 3
0
def setup_default_model(n_planets,
                        datasets,
                        min_period=None,
                        max_period=None,
                        min_amp=None,
                        max_amp=None,
                        circular=True,
                        trend_order=0,
                        model=None):
    model = modelcontext(model)

    if isinstance(datasets, collections.Iterable):
        datasets = datasets
    else:
        datasets = [datasets]

    x, y, yerr = [], [], []
    for data in datasets:
        x.append(data.t)
        y.append(data.rv)
        if data.rverr is not None:
            yerr.append(data.rverr)
    x = np.concatenate(x)
    y = np.concatenate(y)
    if len(yerr):
        yerr = np.concatenate(yerr)
        if len(yerr) != len(x):
            yerr = None
    else:
        yerr = None

    if min_period is None:
        min_period = np.mean(np.diff(np.sort(x)))
    if max_period is None:
        max_period = 0.5 * (x.max() - x.min())

    if min_amp is None:
        if yerr is None:
            min_amp = 0.001 * np.std(y)
        else:
            min_amp = 0.01 * np.min(yerr)
    if max_amp is None:
        max_amp = 1.5 * (y.max() - y.min())

    peaks = find_peaks(n_planets,
                       x,
                       y,
                       yerr,
                       min_period=min_period,
                       max_period=max_period)

    with model:
        planets = []
        for peak, name in zip(peaks, string.ascii_lowercase[1:]):
            logP = pm.Uniform(name + ":logP",
                              lower=np.log(min_period),
                              upper=np.log(max_period),
                              testval=np.log(peak["period"]))
            logK = pm.Uniform(name + ":logK",
                              lower=np.log(min_amp),
                              upper=np.log(max_amp),
                              testval=np.log(
                                  np.clip(peak["amp"], min_amp + 1e-2,
                                          max_amp - 1e-2)))

            eccen = None
            if not circular:
                eccen = pm.Beta(name + ":eccen",
                                alpha=0.867,
                                beta=3.03,
                                testval=0.001)

            planets.append(
                RVPlanet(name, logP, logK, phi=peak["phase"], eccen=eccen))

            if len(planets) > 1:
                pm.Potential(
                    "order:{0}".format(name),
                    tt.switch((planets[-2].logK < planets[-1].logK), 0.0,
                              -np.inf))

        rvmodel = RVModel("rv", datasets, planets)
        pm.Deterministic("logp", model.logpt)

        return rvmodel
Ejemplo n.º 4
0
def fast_sample_posterior_predictive(
    trace: Union[MultiTrace, Dataset, InferenceData, List[Dict[str,
                                                               np.ndarray]]],
    samples: Optional[int] = None,
    model: Optional[Model] = None,
    var_names: Optional[List[str]] = None,
    keep_size: bool = False,
    random_seed=None,
) -> Dict[str, np.ndarray]:
    """Generate posterior predictive samples from a model given a trace.

    This is a vectorized alternative to the standard ``sample_posterior_predictive`` function.
    It aims to be as compatible as possible with the original API, and is significantly
    faster.  Both posterior predictive sampling functions have some remaining issues, and
    we encourage users to verify agreement across the results of both functions for the time
    being.

    Parameters
    ----------
    trace: MultiTrace, xarray.Dataset, InferenceData, or List of points (dictionary)
        Trace generated from MCMC sampling.
    samples: int, optional
        Number of posterior predictive samples to generate. Defaults to one posterior predictive
        sample per posterior sample, that is, the number of draws times the number of chains. It
        is not recommended to modify this value; when modified, some chains may not be represented
        in the posterior predictive sample.
    model: Model (optional if in `with` context)
        Model used to generate `trace`
    var_names: Iterable[str]
        List of vars to sample.
    keep_size: bool, optional
        Force posterior predictive sample to have the same shape as posterior and sample stats
        data: ``(nchains, ndraws, ...)``.
    random_seed: int
        Seed for the random number generator.

    Returns
    -------
    samples: dict
        Dictionary with the variable names as keys, and values numpy arrays containing
        posterior predictive samples.
    """

    ### Implementation note: primarily this function canonicalizes the arguments:
    ### Establishing the model context, wrangling the number of samples,
    ### Canonicalizing the trace argument into a _TraceDict object and fitting it
    ### to the requested number of samples.  Then it invokes posterior_predictive_draw_values
    ### *repeatedly*.  It does this repeatedly, because the trace argument is set up to be
    ### the same as the number of samples. So if the number of samples requested is
    ### greater than the number of samples in the trace parameter, we sample repeatedly.  This
    ### makes the shape issues just a little easier to deal with.

    if isinstance(trace, InferenceData):
        nchains, ndraws = chains_and_samples(trace)
        trace = dataset_to_point_list(trace.posterior)
    elif isinstance(trace, Dataset):
        nchains, ndraws = chains_and_samples(trace)
        trace = dataset_to_point_list(trace)
    elif isinstance(trace, MultiTrace):
        nchains = trace.nchains
        ndraws = len(trace)
    else:
        if keep_size:
            # arguably this should be just a warning.
            raise IncorrectArgumentsError(
                "For keep_size, cannot identify chains and length from %s.",
                trace)

    model = modelcontext(model)
    assert model is not None
    with model:

        if keep_size and samples is not None:
            raise IncorrectArgumentsError(
                "Should not specify both keep_size and samples arguments")

        if isinstance(trace, list) and all(isinstance(x, dict) for x in trace):
            _trace = _TraceDict(point_list=trace)
        elif isinstance(trace, MultiTrace):
            _trace = _TraceDict(multi_trace=trace)
        else:
            raise TypeError(
                "Unable to generate posterior predictive samples from argument of type %s"
                % type(trace))

        len_trace = len(_trace)

        assert isinstance(_trace, _TraceDict)

        _samples: List[int] = []
        # temporary replacement for more complicated logic.
        max_samples: int = len_trace
        if samples is None or samples == max_samples:
            _samples = [max_samples]
        elif samples < max_samples:
            warnings.warn(
                "samples parameter is smaller than nchains times ndraws, some draws "
                "and/or chains may not be represented in the returned posterior "
                "predictive sample")
            # if this is less than the number of samples in the trace, take a slice and
            # work with that.
            _trace = _trace[slice(samples)]
            _samples = [samples]
        elif samples > max_samples:
            full, rem = divmod(samples, max_samples)
            _samples = (full * [max_samples]) + ([rem] if rem != 0 else [])
        else:
            raise IncorrectArgumentsError(
                "Unexpected combination of samples (%s) and max_samples (%d)" %
                (samples, max_samples))

        if var_names is None:
            vars = model.observed_RVs
        else:
            vars = [model[x] for x in var_names]

        if random_seed is not None:
            np.random.seed(random_seed)

        if TYPE_CHECKING:
            _ETPParent = UserDict[str,
                                  np.ndarray]  # this is only processed by mypy
        else:
            # this is not seen by mypy but will be executed at runtime.
            _ETPParent = UserDict

        class _ExtendableTrace(_ETPParent):
            def extend_trace(self, trace: Dict[str, np.ndarray]) -> None:
                for k, v in trace.items():
                    if k in self.data:
                        self.data[k] = np.concatenate((self.data[k], v))
                    else:
                        self.data[k] = v

        ppc_trace = _ExtendableTrace()
        for s in _samples:
            strace = _trace if s == len_trace else _trace[slice(0, s)]
            try:
                values = posterior_predictive_draw_values(
                    cast(List[Any], vars), strace, s)
                new_trace: Dict[str, np.ndarray] = {
                    k.name: v
                    for (k, v) in zip(vars, values)
                }
                ppc_trace.extend_trace(new_trace)
            except KeyboardInterrupt:
                pass

    if keep_size:
        return {
            k: ary.reshape((nchains, ndraws, *ary.shape[1:]))
            for k, ary in ppc_trace.items()
        }
    # this gets us a Dict[str, np.ndarray] instead of my wrapped equiv.
    return ppc_trace.data
Ejemplo n.º 5
0
def svgd(vars=None, n=5000, n_particles=100, jitter=.01,
         optimizer=adagrad, start=None, progressbar=True,
         random_seed=None, model=None):

    if random_seed is not None:
        np.random.seed(random_seed)

    model = modelcontext(model)
    if vars is None:
        vars = model.vars
    vars = pm.inputvars(vars)

    if start is None:
        start = model.test_point
    start = model.dict_to_array(start)

    # Initialize particles
    x0 = np.tile(start, (n_particles, 1))
    x0 += np.random.normal(0, jitter, x0.shape)

    theta = theano.shared(x0)

    # Create theano svgd gradient expression and function
    logp_grad_vec = _make_vectorized_logp_grad(vars, model, theta)
    svgd_grad = -1 * _svgd_gradient(vars, model, theta, logp_grad_vec) # maximize

    svgd_updates = optimizer([svgd_grad], [theta], learning_rate=1e-3)

    i = tt.iscalar('i')
    svgd_step = theano.function([i], [i],
                                updates=svgd_updates)
    # Run svgd optimization
    if progressbar:
        progress = tqdm(np.arange(n))
    else:
        progress = np.arange(n)

    try:
        for ii in progress:
            svgd_step(ii)
    except KeyboardInterrupt:
        pass
    finally:
        if hasattr(progress, 'close'):
            progress.close()

    theta_val = theta.get_value()

    # Build trace

    strace = pm.backends.NDArray()
    try:
        strace.setup(theta_val.shape[0], 1)
        for p in theta_val:
            strace.record(model.bijection.rmap(p))
    except KeyboardInterrupt:
        pass
    finally:
        strace.close()

    trace = pm.backends.base.MultiTrace([strace])

    return trace
Ejemplo n.º 6
0
def find_MAP(start=None,
             vars=None,
             method="L-BFGS-B",
             return_raw=False,
             include_transformed=True,
             progressbar=True,
             maxeval=5000,
             model=None,
             *args,
             **kwargs):
    """
    Finds the local maximum a posteriori point given a model.

    find_MAP should not be used to initialize the NUTS sampler. Simply call pymc3.sample() and it will automatically initialize NUTS in a better way.

    Parameters
    ----------
    start: `dict` of parameter values (Defaults to `model.test_point`)
    vars: list
        List of variables to optimize and set to optimum (Defaults to all continuous).
    method: string or callable
        Optimization algorithm (Defaults to 'L-BFGS-B' unless
        discrete variables are specified in `vars`, then
        `Powell` which will perform better).  For instructions on use of a callable,
        refer to SciPy's documentation of `optimize.minimize`.
    return_raw: bool
        Whether to return the full output of scipy.optimize.minimize (Defaults to `False`)
    include_transformed: bool, optional defaults to True
        Flag for reporting automatically transformed variables in addition
        to original variables.
    progressbar: bool, optional defaults to True
        Whether or not to display a progress bar in the command line.
    maxeval: int, optional, defaults to 5000
        The maximum number of times the posterior distribution is evaluated.
    model: Model (optional if in `with` context)
    *args, **kwargs
        Extra args passed to scipy.optimize.minimize

    Notes
    -----
    Older code examples used find_MAP() to initialize the NUTS sampler,
    but this is not an effective way of choosing starting values for sampling.
    As a result, we have greatly enhanced the initialization of NUTS and
    wrapped it inside pymc3.sample() and you should thus avoid this method.
    """
    model = modelcontext(model)
    if start is None:
        start = model.test_point
    else:
        update_start_vals(start, model.test_point, model)

    check_start_vals(start, model)

    if vars is None:
        vars = model.cont_vars
    vars = inputvars(vars)
    disc_vars = list(typefilter(vars, discrete_types))
    allinmodel(vars, model)

    start = Point(start, model=model)
    bij = DictToArrayBijection(ArrayOrdering(vars), start)
    logp_func = bij.mapf(model.fastlogp_nojac)
    x0 = bij.map(start)

    try:
        dlogp_func = bij.mapf(model.fastdlogp_nojac(vars))
        compute_gradient = True
    except (AttributeError, NotImplementedError, tg.NullTypeGradError):
        compute_gradient = False

    if disc_vars or not compute_gradient:
        pm._log.warning(
            "Warning: gradient not available." +
            "(E.g. vars contains discrete variables). MAP " +
            "estimates may not be accurate for the default " +
            "parameters. Defaulting to non-gradient minimization " +
            "'Powell'.")
        method = "Powell"

    if "fmin" in kwargs:
        fmin = kwargs.pop("fmin")
        warnings.warn(
            "In future versions, set the optimization algorithm with a string. "
            'For example, use `method="L-BFGS-B"` instead of '
            '`fmin=sp.optimize.fmin_l_bfgs_b"`.')

        cost_func = CostFuncWrapper(maxeval, progressbar, logp_func)

        # Check to see if minimization function actually uses the gradient
        if "fprime" in getargspec(fmin).args:

            def grad_logp(point):
                return nan_to_num(-dlogp_func(point))

            opt_result = fmin(cost_func, x0, fprime=grad_logp, *args, **kwargs)
        else:
            # Check to see if minimization function uses a starting value
            if "x0" in getargspec(fmin).args:
                opt_result = fmin(cost_func, x0, *args, **kwargs)
            else:
                opt_result = fmin(cost_func, *args, **kwargs)

        if isinstance(opt_result, tuple):
            mx0 = opt_result[0]
        else:
            mx0 = opt_result
    else:
        # remove 'if' part, keep just this 'else' block after version change
        if compute_gradient:
            cost_func = CostFuncWrapper(maxeval, progressbar, logp_func,
                                        dlogp_func)
        else:
            cost_func = CostFuncWrapper(maxeval, progressbar, logp_func)

        try:
            opt_result = minimize(cost_func,
                                  x0,
                                  method=method,
                                  jac=compute_gradient,
                                  *args,
                                  **kwargs)
            mx0 = opt_result["x"]  # r -> opt_result
        except (KeyboardInterrupt, StopIteration) as e:
            mx0, opt_result = cost_func.previous_x, None
            if isinstance(e, StopIteration):
                pm._log.info(e)
        finally:
            last_v = cost_func.n_eval
            if progressbar:
                assert isinstance(cost_func.progress, ProgressBar)
                cost_func.progress.total = last_v
                cost_func.progress.update(last_v)
                print()

    vars = get_default_varnames(model.unobserved_RVs, include_transformed)
    mx = {
        var.name: value
        for var, value in zip(vars,
                              model.fastfn(vars)(bij.rmap(mx0)))
    }

    if return_raw:
        return mx, opt_result
    else:
        return mx
Ejemplo n.º 7
0
    def __init__(self,
                 n0=10,
                 init_samples=None,
                 k_trunc=np.inf,
                 eps_z=.01,
                 nf_iter=2,
                 N=10,
                 t_ess=0.5,
                 beta_max=1,
                 model=None,
                 random_seed=-1,
                 chain=0,
                 frac_validate=0.0,
                 iteration=None,
                 alpha_w=(0, 0),
                 alpha_uw=(0, 0),
                 verbose=False,
                 n_component=None,
                 interp_nbin=None,
                 KDE=True,
                 bw_factor_min=1.0,
                 bw_factor_max=1.0,
                 bw_factor_num=1,
                 rel_bw=1,
                 edge_bins=None,
                 ndata_wT=None,
                 MSWD_max_iter=None,
                 NBfirstlayer=True,
                 logit=False,
                 Whiten=False,
                 trainable_qw=False,
                 sgd_steps=0,
                 knots_trainable=5,
                 batchsize=None,
                 nocuda=False,
                 patch=False,
                 shape=[28, 28, 1],
                 bounds=None):
        self.N = N
        self.n0 = n0

        self.model = model
        self.chain = chain

        # Init method params.
        self.init_samples = init_samples

        self.random_seed = random_seed

        # Set the torch seed.
        if self.random_seed != 1:
            np.random.seed(self.random_seed)
            torch.manual_seed(self.random_seed)

        # Separating out so I can keep track. These are SINF params.
        assert 0.0 <= frac_validate <= 1.0
        self.frac_validate = frac_validate
        self.iteration = iteration
        self.alpha_uw = alpha_uw
        self.alpha_w = alpha_w
        self.k_trunc = k_trunc
        self.verbose = verbose
        self.n_component = n_component
        self.interp_nbin = interp_nbin
        self.KDE = KDE
        self.bw_factors = np.linspace(bw_factor_min, bw_factor_max,
                                      bw_factor_num)
        self.edge_bins = edge_bins
        self.ndata_wT = ndata_wT
        self.MSWD_max_iter = MSWD_max_iter
        self.NBfirstlayer = NBfirstlayer
        self.logit = logit
        self.Whiten = Whiten
        self.batchsize = batchsize
        self.nocuda = nocuda
        self.patch = patch
        self.shape = shape

        #convert array of bounds passed in from [][x1min,x2min,...],[x1max,x2max...]] to what SINF wants, [[x1min,x1max],[x2min,x2max],...]
        if (bounds is not None):
            bounds_sinf = list([list(b) for b in bounds.T])
        else:
            bounds_sinf = [
                [None, None] for i in range(init_samples.shape[1])
            ]  #get the dimensionality from initial samples assuming (N,d) shape
        self.bounds = bounds_sinf

        #trainable sinf
        self.trainable_qw = trainable_qw
        self.sgd_steps = sgd_steps
        self.knots_trainable = knots_trainable

        #nfo
        self.t_ess = t_ess
        self.beta_max = beta_max
        self.beta = 0  #initial value of beta before iterating, match smc
        self.rel_bw = rel_bw

        self.model = modelcontext(model)
        self.variables = inputvars(self.model.vars)
Ejemplo n.º 8
0
    def __init__(
        self,
        vars=None,
        batch_size=None,
        total_size=None,
        step_size=1.0,
        model=None,
        random_seed=None,
        minibatches=None,
        minibatch_tensors=None,
        **kwargs
    ):
        warnings.warn(EXPERIMENTAL_WARNING)

        model = modelcontext(model)

        if vars is None:
            vars = model.vars

        vars = inputvars(vars)

        self.model = model
        self.vars = vars
        self.batch_size = batch_size
        self.total_size = total_size
        _value_error(
            total_size != None or batch_size != None,
            "total_size and batch_size of training data have to be specified",
        )
        self.expected_iter = int(total_size / batch_size)

        # set random stream
        self.random = None
        if random_seed is None:
            self.random = at_rng()
        else:
            self.random = at_rng(random_seed)

        self.step_size = step_size

        shared = make_shared_replacements(vars, model)

        self.updates = OrderedDict()
        self.q_size = int(sum(v.dsize for v in self.vars))

        flat_view = model.flatten(vars)
        self.inarray = [flat_view.input]

        self.dlog_prior = prior_dlogp(vars, model, flat_view)
        self.dlogp_elemwise = elemwise_dlogL(vars, model, flat_view)
        self.q_size = int(sum(v.dsize for v in self.vars))

        if minibatch_tensors != None:
            _check_minibatches(minibatch_tensors, minibatches)
            self.minibatches = minibatches

            # Replace input shared variables with tensors
            def is_shared(t):
                return isinstance(t, aesara.compile.sharedvalue.SharedVariable)

            tensors = [(t.type() if is_shared(t) else t) for t in minibatch_tensors]
            updates = OrderedDict(
                {t: t_ for t, t_ in zip(minibatch_tensors, tensors) if is_shared(t)}
            )
            self.minibatch_tensors = tensors
            self.inarray += self.minibatch_tensors
            self.updates.update(updates)

        self._initialize_values()
        super().__init__(vars, shared)
Ejemplo n.º 9
0
def get_dense_nuts_step(
    start=None,
    adaptation_window=101,
    doubling=True,
    initial_weight=10,
    use_hessian=False,
    use_hessian_diag=False,
    hessian_regularization=1e-8,
    model=None,
    **kwargs,
):
    """Get a NUTS step function with a dense mass matrix

    The entries in the mass matrix will be tuned based on the sample
    covariances during tuning. All extra arguments are passed directly to
    ``pymc3.NUTS``.

    Args:
        start (dict, optional): A starting point in parameter space. If not
            provided, the model's ``test_point`` is used.
        adaptation_window (int, optional): The (initial) size of the window
            used for sample covariance estimation.
        doubling (bool, optional): If ``True`` (default) the adaptation window
            is doubled each time the matrix is updated.

    """
    model = modelcontext(model)

    if not all_continuous(model.vars):
        raise ValueError("NUTS can only be used for models with only "
                         "continuous variables.")

    if start is None:
        start = model.test_point
    mean = model.dict_to_array(start)

    if use_hessian or use_hessian_diag:
        try:
            import numdifftools as nd
        except ImportError:
            raise ImportError(
                "The 'numdifftools' package is required for Hessian "
                "computations")

        logger.info("Numerically estimating Hessian matrix")
        if use_hessian_diag:
            hess = nd.Hessdiag(model.logp_array)(mean)
            var = np.diag(-1.0 / hess)
        else:
            hess = nd.Hessian(model.logp_array)(mean)
            var = -np.linalg.inv(hess)

        factor = 1
        success = False
        while not success:
            var[np.diag_indices_from(var)] += factor * hessian_regularization

            try:
                np.linalg.cholesky(var)
            except np.linalg.LinAlgError:
                factor *= 2
            else:
                success = True

    else:
        var = np.eye(len(mean))

    potential = QuadPotentialDenseAdapt(
        model.ndim,
        mean,
        var,
        initial_weight,
        adaptation_window=adaptation_window,
        doubling=doubling,
    )

    return pm.NUTS(potential=potential, model=model, **kwargs)
Ejemplo n.º 10
0
    def __init__(self,
                 vars=None,
                 scaling=None,
                 step_scale=0.25,
                 is_cov=False,
                 model=None,
                 blocked=True,
                 use_single_leapfrog=False,
                 potential=None,
                 integrator="leapfrog",
                 **theano_kwargs):
        """Superclass to implement Hamiltonian/hybrid monte carlo

        Parameters
        ----------
        vars : list of theano variables
        scaling : array_like, ndim = {1,2}
            Scaling for momentum distribution. 1d arrays interpreted matrix diagonal.
        step_scale : float, default=0.25
            Size of steps to take, automatically scaled down by 1/n**(1/4)
        is_cov : bool, default=False
            Treat scaling as a covariance matrix/vector if True, else treat it as a
            precision matrix/vector
        model : pymc3 Model instance.  default=Context model
        blocked: Boolean, default True
        use_single_leapfrog: Boolean, will leapfrog steps take a single step at a time.
            default False.
        potential : Potential, optional
            An object that represents the Hamiltonian with methods `velocity`,
            `energy`, and `random` methods.
        **theano_kwargs: passed to theano functions
        """
        model = modelcontext(model)

        if vars is None:
            vars = model.cont_vars
        vars = inputvars(vars)

        if scaling is None and potential is None:
            scaling = model.test_point

        if isinstance(scaling, dict):
            scaling = guess_scaling(Point(scaling, model=model),
                                    model=model,
                                    vars=vars)

        if scaling is not None and potential is not None:
            raise ValueError("Can not specify both potential and scaling.")

        self.step_size = step_scale / (model.ndim**0.25)
        if potential is not None:
            self.potential = potential
        else:
            self.potential = quad_potential(scaling, is_cov, as_cov=False)

        shared = make_shared_replacements(vars, model)
        if theano_kwargs is None:
            theano_kwargs = {}

        self.H, self.compute_energy, self.leapfrog, self.dlogp = get_theano_hamiltonian_functions(
            vars, shared, model.logpt, self.potential, use_single_leapfrog,
            integrator, **theano_kwargs)

        super(BaseHMC, self).__init__(vars, shared, blocked=blocked)
Ejemplo n.º 11
0
def loo(trace, model=None, reff=None, progressbar=False):
    """Calculates leave-one-out (LOO) cross-validation for out of sample
    predictive model fit, following Vehtari et al. (2015). Cross-validation is
    computed using Pareto-smoothed importance sampling (PSIS).

    Parameters
    ----------
    trace : result of MCMC run
    model : PyMC Model
        Optional model. Default None, taken from context.
    reff : float
        relative MCMC efficiency, `effective_n / N` i.e. number of effective
        samples divided by the number of actual samples. Computed from trace by
        default.
    progressbar: bool
        Whether or not to display a progress bar in the command line. The
        bar shows the percentage of completion, the evaluation speed, and
        the estimated time to completion

    Returns
    -------
    df_loo: pandas.DataFrame 
        Estimation and standard error of `elpd_loo`, `p_loo`, and `looic`
    pointwise: dict
        point-wise value of `elpd_loo`, `p_loo`, `looic` and pareto shape `k`
    """
    model = modelcontext(model)

    if reff is None:
        if trace.nchains == 1:
            reff = 1.
        else:
            eff = effective_n(trace)
            eff_ave = pmstat.dict2pd(eff, 'eff').mean()
            samples = len(trace) * trace.nchains
            reff = eff_ave / samples

    log_py = pmstat._log_post_trace(trace, model, progressbar=progressbar)
    if log_py.size == 0:
        raise ValueError('The model does not contain observed values.')

    shape_str = ' by '.join(map(str, log_py.shape))
    print('Computed from ' + shape_str + ' log-likelihood matrix')

    lw, ks = pmstat._psislw(-log_py, reff)
    lw += log_py

    elpd_loo_i = logsumexp(lw, axis=0)
    elpd_loo = elpd_loo_i.sum()
    elpd_loo_se = (len(elpd_loo_i) * np.var(elpd_loo_i)) ** 0.5

    loo_lppd_i = - 2 * elpd_loo_i
    loo_lppd = loo_lppd_i.sum()
    loo_lppd_se = (len(loo_lppd_i) * np.var(loo_lppd_i)) ** 0.5

    lppd_i = logsumexp(log_py, axis=0, b=1. / log_py.shape[0])
    p_loo_i = lppd_i - elpd_loo_i
    p_loo = p_loo_i.sum()
    p_loo_se = (len(p_loo_i) * np.var(p_loo_i)) ** 0.5

    df_loo = (pd.DataFrame(dict(Estimate=[elpd_loo, p_loo, loo_lppd],
                                SE=[elpd_loo_se, p_loo_se, loo_lppd_se]))
                .rename(index={0: 'elpd_loo', 
                               1: 'p_loo', 
                               2: 'looic'}))
    pointwise = dict(elpd_loo=elpd_loo_i,
                        p_loo=p_loo_i,
                        looic=loo_lppd_i, 
                        ks=ks)
    return df_loo, pointwise
Ejemplo n.º 12
0
    def __init__(self,
                 vars=None,
                 scaling=None,
                 step_scale=0.25,
                 is_cov=False,
                 model=None,
                 blocked=True,
                 potential=None,
                 integrator="leapfrog",
                 dtype=None,
                 **theano_kwargs):
        """Set up Hamiltonian samplers with common structures.

        Parameters
        ----------
        vars : list of theano variables
        scaling : array_like, ndim = {1,2}
            Scaling for momentum distribution. 1d arrays interpreted matrix
            diagonal.
        step_scale : float, default=0.25
            Size of steps to take, automatically scaled down by 1/n**(1/4)
        is_cov : bool, default=False
            Treat scaling as a covariance matrix/vector if True, else treat
            it as a precision matrix/vector
        model : pymc3 Model instance
        blocked: bool, default=True
        potential : Potential, optional
            An object that represents the Hamiltonian with methods `velocity`,
            `energy`, and `random` methods.
        **theano_kwargs: passed to theano functions
        """
        model = modelcontext(model)

        if vars is None:
            vars = model.cont_vars
        vars = inputvars(vars)

        super(BaseHMC, self).__init__(vars,
                                      blocked=blocked,
                                      model=model,
                                      dtype=dtype,
                                      **theano_kwargs)

        size = self._logp_dlogp_func.size

        if scaling is None and potential is None:
            mean = floatX(np.zeros(size))
            var = floatX(np.ones(size))
            potential = QuadPotentialDiagAdapt(size, mean, var, 10)

        if isinstance(scaling, dict):
            point = Point(scaling, model=model)
            scaling = guess_scaling(point, model=model, vars=vars)

        if scaling is not None and potential is not None:
            raise ValueError("Can not specify both potential and scaling.")

        self.step_size = step_scale / (size**0.25)
        if potential is not None:
            self.potential = potential
        else:
            self.potential = quad_potential(scaling, is_cov)

        self.integrator = integration.CpuLeapfrogIntegrator(
            size, self.potential, self._logp_dlogp_func)
Ejemplo n.º 13
0
def sample_nf_smc(
    draws=2000,
    start=None,
    threshold=0.5,
    frac_validate=0.1,
    iteration=5,
    alpha=(0, 0),
    k_trunc=0.25,
    pareto=False,
    epsilon=1e-3,
    local_thresh=3,
    local_step_size=0.1,
    local_grad=True,
    nf_local_iter=0,
    max_line_search=2,
    verbose=False,
    n_component=None,
    interp_nbin=None,
    KDE=True,
    bw_factor=0.5,
    edge_bins=None,
    ndata_wT=None,
    MSWD_max_iter=None,
    NBfirstlayer=True,
    logit=False,
    Whiten=False,
    batchsize=None,
    nocuda=False,
    patch=False,
    shape=[28, 28, 1],
    model=None,
    random_seed=-1,
    parallel=False,
    chains=None,
    cores=None,
):
    r"""
    Sequential Monte Carlo based sampling.

    Parameters
    ----------
    draws: int
        The number of samples to draw from the posterior (i.e. last stage). And also the number of
        independent chains. Defaults to 2000.
    start: dict, or array of dict
        Starting point in parameter space. It should be a list of dict with length `chains`.
        When None (default) the starting point is sampled from the prior distribution.
    threshold: float
        Determines the change of beta from stage to stage, i.e.indirectly the number of stages,
        the higher the value of `threshold` the higher the number of stages. Defaults to 0.5.
        It should be between 0 and 1.
    model: Model (optional if in ``with`` context)).
    random_seed: int
        random seed
    parallel: bool
        Distribute computations across cores if the number of cores is larger than 1.
        Defaults to False.
    cores : int
        The number of chains to run in parallel. If ``None``, set to the number of CPUs in the
        system, but at most 4.
    chains : int
        The number of chains to sample. Running independent chains is important for some
        convergence statistics. If ``None`` (default), then set to either ``cores`` or 2, whichever
        is larger.

    Notes
    -----
    SMC works by moving through successive stages. At each stage the inverse temperature
    :math:`\beta` is increased a little bit (starting from 0 up to 1). When :math:`\beta` = 0
    we have the prior distribution and when :math:`\beta` =1 we have the posterior distribution.
    So in more general terms we are always computing samples from a tempered posterior that we can
    write as:

    .. math::

        p(\theta \mid y)_{\beta} = p(y \mid \theta)^{\beta} p(\theta)

    A summary of the algorithm is:

     1. Initialize :math:`\beta` at zero and stage at zero.
     2. Generate N samples :math:`S_{\beta}` from the prior (because when :math `\beta = 0` the
        tempered posterior is the prior).
     3. Increase :math:`\beta` in order to make the effective sample size equals some predefined
        value (we use :math:`Nt`, where :math:`t` is 0.5 by default).
     4. Compute a set of N importance weights W. The weights are computed as the ratio of the
        likelihoods of a sample at stage i+1 and stage i.
     5. Obtain :math:`S_{w}` by re-sampling according to W.
     6. Use W to compute the mean and covariance for the proposal distribution, a MVNormal.
     7. For stages other than 0 use the acceptance rate from the previous stage to estimate
        `n_steps`.
     8. Run N independent Metropolis-Hastings (IMH) chains (each one of length `n_steps`),
        starting each one from a different sample in :math:`S_{w}`. Samples are IMH as the proposal
        mean is the of the previous posterior stage and not the current point in parameter space.
     9. Repeat from step 3 until :math:`\beta \ge 1`.
     10. The final result is a collection of N samples from the posterior.


    References
    ----------
    .. [Minson2013] Minson, S. E. and Simons, M. and Beck, J. L., (2013),
        Bayesian inversion for finite fault earthquake source models I- Theory and algorithm.
        Geophysical Journal International, 2013, 194(3), pp.1701-1726,
        `link <https://gji.oxfordjournals.org/content/194/3/1701.full>`__

    .. [Ching2007] Ching, J. and Chen, Y. (2007).
        Transitional Markov Chain Monte Carlo Method for Bayesian Model Updating, Model Class
        Selection, and Model Averaging. J. Eng. Mech., 10.1061/(ASCE)0733-9399(2007)133:7(816),
        816-832. `link <http://ascelibrary.org/doi/abs/10.1061/%28ASCE%290733-9399
        %282007%29133:7%28816%29>`__
    """
    _log = logging.getLogger("pymc3")
    _log.info("Initializing SMC+SINF sampler...")

    model = modelcontext(model)
    if model.name:
        raise NotImplementedError(
            "The SMC implementation currently does not support named models. "
            "See https://github.com/pymc-devs/pymc3/pull/4365.")
    if cores is None:
        cores = _cpu_count()

    if chains is None:
        chains = max(2, cores)
    elif chains == 1:
        cores = 1

    _log.info(f"Sampling {chains} chain{'s' if chains > 1 else ''} "
              f"in {cores} job{'s' if cores > 1 else ''}")

    if random_seed == -1:
        random_seed = None
    if chains == 1 and isinstance(random_seed, int):
        random_seed = [random_seed]
    if random_seed is None or isinstance(random_seed, int):
        if random_seed is not None:
            np.random.seed(random_seed)
        random_seed = [np.random.randint(2**30) for _ in range(chains)]
    if not isinstance(random_seed, Iterable):
        raise TypeError(
            "Invalid value for `random_seed`. Must be tuple, list or int")

    params = (
        draws,
        start,
        threshold,
        frac_validate,
        iteration,
        alpha,
        k_trunc,
        pareto,
        epsilon,
        local_thresh,
        local_step_size,
        local_grad,
        nf_local_iter,
        max_line_search,
        verbose,
        n_component,
        interp_nbin,
        KDE,
        bw_factor,
        edge_bins,
        ndata_wT,
        MSWD_max_iter,
        NBfirstlayer,
        logit,
        Whiten,
        batchsize,
        nocuda,
        patch,
        shape,
        model,
    )

    t1 = time.time()
    if parallel and chains > 1:
        loggers = [_log] + [None] * (chains - 1)
        pool = mp.Pool(cores)
        results = pool.starmap(sample_nf_smc_int,
                               [(*params, random_seed[i], i, loggers[i])
                                for i in range(chains)])

        pool.close()
        pool.join()
    else:
        results = []
        for i in range(chains):
            results.append(sample_nf_smc_int(*params, random_seed[i], i, _log))

    (
        traces,
        log_marginal_likelihood,
        q_samples,
        q_log_weights,
        betas,
    ) = zip(*results)
    trace = MultiTrace(traces)
    trace.report._n_draws = draws
    trace.report.log_marginal_likelihood = log_marginal_likelihood
    trace.report.q_samples = q_samples
    trace.report.q_log_weights = q_log_weights
    trace.report.betas = betas
    trace.report._t_sampling = time.time() - t1

    return trace
Ejemplo n.º 14
0
def sample_nfmc(draws=500,
                init_draws=500,
                resampling_draws=500,
                init_ess=100,
                init_method='prior',
                init_samples=None,
                start=None,
                sample_mode='reinit',
                finish_regularized=False,
                cull_lowp_tol=0.05,
                init_EL2O='adam',
                mean_field_EL2O=False,
                use_hess_EL2O=False,
                absEL2O=1e-10,
                fracEL2O=1e-2,
                EL2O_draws=100,
                maxiter_EL2O=500,
                EL2O_optim_method='L-BFGS-B',
                scipy_map_method='L-BFGS-B',
                adam_lr=1e-3,
                adam_b1=0.9,
                adam_b2=0.999,
                adam_eps=1.0e-8,
                adam_steps=1000,
                simulator=None,
                model_data=None,
                sim_data_cov=None,
                sim_size=None,
                sim_params=None,
                sim_start=None,
                sim_optim_method='lbfgs',
                sim_tol=0.01,
                local_thresh=3,
                local_step_size=0.1,
                local_grad=True,
                init_local=True,
                full_local=False,
                nf_local_iter=3,
                max_line_search=100,
                k_trunc=0.25,
                norm_tol=0.01,
                ess_tol=0.5,
                optim_iter=1000,
                ftol=2.220446049250313e-9,
                gtol=1.0e-5,
                nf_iter=3,
                model=None,
                frac_validate=0.1,
                iteration=None,
                final_iteration=None,
                alpha=(0, 0),
                final_alpha=(0.75, 0.75),
                verbose=False,
                n_component=None,
                interp_nbin=None,
                KDE=True,
                bw_factor_min=0.5,
                bw_factor_max=2.5,
                bw_factor_num=11,
                edge_bins=None,
                ndata_wT=None,
                MSWD_max_iter=None,
                NBfirstlayer=True,
                logit=False,
                Whiten=False,
                batchsize=None,
                nocuda=False,
                patch=False,
                shape=[28, 28, 1],
                redraw=True,
                random_seed=-1,
                parallel=False,
                chains=None,
                cores=None):
    r"""
    Normalizing flow based nested sampling.

    Parameters
    ----------
    draws: int
        The number of samples to draw from the posterior (i.e. last stage). And also the number of
        independent chains. Defaults to 2000.
    start: dict, or array of dict
        Starting point in parameter space. It should be a list of dict with length `chains`.
        When None (default) the starting point is sampled from the prior distribution.
    init_method: str
        Tells us how to initialize the NFMC fits. Default is 'prior'. If this is supplied along with init_samples
        we use those instead. Current options are 'prior', 'full_rank', 'lbfgs'.
    norm_tol: float
        Fractional difference in the evidence estimate between two steps. If it falls below this we
        stop iterating over the NF fits.
    optim_iter: int
        Maximum number of optimization steps to run during the initialization.
    nf_iter: int
        Number of NF fit iterations to go through after the optimization step.
    model: Model (optional if in ``with`` context)).
    frac_validate: float
        Fraction of the live points at each NS iteration that we use for validation of the NF fit.
    alpha: tuple of floats
        Regularization parameters used for the NF fit.
    verbose: boolean
        Whether you want verbose output from the NF fit.
    random_seed: int
        random seed
    parallel: bool
        Distribute computations across cores if the number of cores is larger than 1.
        Defaults to False.
    cores : int
        Number of cores available for the optimization step. Defaults to None, in which case the CPU
        count is used.
    chains : int
        The number of chains to sample. Running independent chains is important for some
        convergence statistics. Default is 2.

    """

    _log = logging.getLogger("pymc3")
    _log.info("Initializing normalizing flow based sampling...")

    model = modelcontext(model)
    if model.name:
        raise NotImplementedError(
            "The NS_NFMC implementation currently does not support named models. "
            "See https://github.com/pymc-devs/pymc3/pull/4365.")
    if cores is None:
        cores = _cpu_count()

    _log.info(f"Sampling {chains} chain{'s' if chains > 1 else ''} "
              f"Cores available for optimization: {cores}")

    if random_seed == -1:
        random_seed = None
    if chains == 1 and isinstance(random_seed, int):
        random_seed = [random_seed]
    if random_seed is None or isinstance(random_seed, int):
        if random_seed is not None:
            np.random.seed(random_seed)
        random_seed = [np.random.randint(2**30) for _ in range(chains)]
    if not isinstance(random_seed, Iterable):
        raise TypeError(
            "Invalid value for `random_seed`. Must be tuple, list or int")

    assert (sample_mode == 'reinit' or sample_mode == 'keep_local'
            or sample_mode == 'function_approx')

    params = (
        draws,
        init_draws,
        resampling_draws,
        init_ess,
        init_method,
        init_samples,
        start,
        sample_mode,
        finish_regularized,
        cull_lowp_tol,
        init_EL2O,
        mean_field_EL2O,
        use_hess_EL2O,
        absEL2O,
        fracEL2O,
        EL2O_draws,
        maxiter_EL2O,
        EL2O_optim_method,
        scipy_map_method,
        adam_lr,
        adam_b1,
        adam_b2,
        adam_eps,
        adam_steps,
        simulator,
        model_data,
        sim_data_cov,
        sim_size,
        sim_params,
        sim_start,
        sim_optim_method,
        sim_tol,
        local_thresh,
        local_step_size,
        local_grad,
        init_local,
        full_local,
        nf_local_iter,
        max_line_search,
        k_trunc,
        norm_tol,
        ess_tol,
        optim_iter,
        ftol,
        gtol,
        nf_iter,
        model,
        frac_validate,
        iteration,
        final_iteration,
        alpha,
        final_alpha,
        cores,
        verbose,
        n_component,
        interp_nbin,
        KDE,
        bw_factor_min,
        bw_factor_max,
        bw_factor_num,
        edge_bins,
        ndata_wT,
        MSWD_max_iter,
        NBfirstlayer,
        logit,
        Whiten,
        batchsize,
        nocuda,
        patch,
        shape,
        redraw,
        parallel,
    )

    t1 = time.time()

    results = []
    for i in range(chains):
        results.append(sample_nfmc_int(*params, random_seed[i], i, _log))
    (traces, log_evidence, q_samples, importance_weights, total_samples,
     total_weights, logp, logq, train_logp, train_logq, logZ, q_models, q_ess,
     train_ess, total_ess, min_var_bws, min_pq_bws) = zip(*results)
    trace = MultiTrace(traces)
    trace.report.log_evidence = log_evidence
    trace.report.q_samples = q_samples
    trace.report.importance_weights = importance_weights
    trace.report.total_samples = total_samples
    trace.report.total_weights = total_weights
    trace.report.logp = logp
    trace.report.logq = logq
    trace.report.train_logp = train_logp
    trace.report.train_logq = train_logq
    trace.report.logZ = logZ
    trace.report.q_models = q_models
    trace.report.q_ess = q_ess
    trace.report.train_ess = train_ess
    trace.report.total_ess = total_ess
    trace.report._n_draws = draws
    trace.report.min_var_bws = min_var_bws
    trace.report.min_pq_bws = min_pq_bws
    trace.report._t_sampling = time.time() - t1

    return trace
Ejemplo n.º 15
0
    def __init__(
        self,
        draws=2000,
        start=None,
        threshold=0.5,
        model=None,
        random_seed=-1,
        chain=0,
        frac_validate=0.1,
        iteration=None,
        alpha=(0, 0),
        k_trunc=0.5,
        pareto=False,
        epsilon=1e-3,
        local_thresh=3,
        local_step_size=0.1,
        local_grad=True,
        nf_local_iter=0,
        max_line_search=2,
        verbose=False,
        n_component=None,
        interp_nbin=None,
        KDE=True,
        bw_factor=0.5,
        edge_bins=None,
        ndata_wT=None,
        MSWD_max_iter=None,
        NBfirstlayer=True,
        logit=False,
        Whiten=False,
        batchsize=None,
        nocuda=False,
        patch=False,
        shape=[28, 28, 1],
    ):

        self.draws = draws
        self.start = start
        self.threshold = threshold
        self.model = model
        self.random_seed = random_seed
        self.chain = chain
        self.frac_validate = frac_validate
        self.iteration = iteration
        self.alpha = alpha
        self.k_trunc = k_trunc
        self.pareto = pareto
        self.epsilon = epsilon

        self.local_thresh = local_thresh
        self.local_step_size = local_step_size
        self.local_grad = local_grad
        self.nf_local_iter = nf_local_iter
        self.max_line_search = max_line_search

        self.verbose = verbose
        self.n_component = n_component
        self.interp_nbin = interp_nbin
        self.KDE = KDE
        self.bw_factor = bw_factor
        self.edge_bins = edge_bins
        self.ndata_wT = ndata_wT
        self.MSWD_max_iter = MSWD_max_iter
        self.NBfirstlayer = NBfirstlayer
        self.logit = logit
        self.Whiten = Whiten
        self.batchsize = batchsize
        self.nocuda = nocuda
        self.patch = patch
        self.shape = shape

        self.model = modelcontext(model)

        if self.random_seed != -1:
            np.random.seed(self.random_seed)

        self.beta = 0
        self.variables = inputvars(self.model.vars)
        self.weights = np.ones(self.draws) / self.draws
        #self.sinf_logq = np.array([])
        self.log_marginal_likelihood = 0
Ejemplo n.º 16
0
def Marginal_llk(mtrace,
                 model=None,
                 ADVI=False,
                 trace2=None,
                 logp=None,
                 maxiter=1000,
                 burn_in=1000):
    """The Bridge Sampling Estimator of the Marginal Likelihood.
    Parameters
    ----------
    mtrace : MultiTrace, result of MCMC run
    model : PyMC Model Optional model. Default None, taken from context.
    logp : Model Log-probability function, read from the model by default
    maxiter : Maximum number of iterations
    Returns
    -------
    marg_llk : Estimated Marginal log-Likelihood.
    """
    r0, tol1, tol2 = 0.5, 1e-2, 1e-2

    model = modelcontext(model)
    if logp is None:
        logp = model.logp_array
    vars = model.free_RVs

    len_trace = len(mtrace)

    if ADVI == False:
        nchain = mtrace.nchains
        N1_ = len_trace // 2
        N1 = N1_ * nchain
        N2 = len_trace * nchain - N1
        neff_list = dict()
    else:
        nchain = 2
        N1_ = len_trace
        N1 = N1_
        N2 = len_trace

    arraysz = model.bijection.ordering.size
    samples_4_fit = np.zeros((arraysz, N1))
    samples_4_iter = np.zeros((arraysz, N2))
    for var in vars:
        varmap = model.bijection.ordering.by_name[var.name]
        neff_list = dict()
        if ADVI == True:
            x = mtrace[0:N1_][var.name]
            samples_4_fit[varmap.slc, :] = x
        else:
            x = mtrace[0:N1_][var.name]
            samples_4_fit[varmap.slc, :] = x.reshape(
                (x.shape[0], np.prod(x.shape[1:], dtype=int))).T

        if ADVI == True:
            x2 = trace2[0:][var.name]
            samples_4_iter[varmap.slc, :] = x2
            neff_list.update(pm.effective_n(trace2[0:], varnames=[var.name]))

        else:
            x2 = mtrace[N1_:][var.name]
            samples_4_iter[varmap.slc, :] = x2.reshape(
                (x2.shape[0], np.prod(x2.shape[1:], dtype=int))).T
            neff_list.update(pm.effective_n(mtrace[N1_:], varnames=[var.name]))

    neff = pm.stats.dict2pd(neff_list, 'temp').median()
    m = np.mean(samples_4_fit, axis=1)
    V = np.cov(samples_4_fit)

    if np.all(np.linalg.eigvals(V) > 0):
        L = chol(V, lower=True)
    else:
        print('SDP converting')
        V = sdp.nearPD(V)
        L = chol(V, lower=True)

    print('m: ', np.sum(np.isinf(m[:, None])))

    gen_samples = m[:, None] + dot(
        L, st.norm.rvs(0, 1, size=samples_4_iter.shape))
    print('gen_samples: ', np.sum(np.isinf(gen_samples)))
    #gen_samples[gen_samples == inf] = 0
    # Evaluate proposal distribution for posterior & generated samples
    q12 = st.multivariate_normal.logpdf(samples_4_iter.T, m, V)
    q22 = st.multivariate_normal.logpdf(gen_samples.T, m, V)
    print('q12: ', np.sum(np.isinf(q12)))
    print('q22: ', np.sum(np.isinf(q22)))

    # Evaluate unnormalized posterior for posterior & generated samples
    q11 = np.asarray([logp(point) for point in samples_4_iter.T])
    q21 = np.asarray([logp(point) for point in gen_samples.T])

    q21[np.isneginf(q21)] = -100000
    q11[np.isneginf(q11)] = -100000

    def iterative_scheme(q11, q12, q21, q22, r0, neff, tol, maxiter,
                         criterion):
        l1 = q11 - q12
        l2 = q21 - q22
        lstar = np.median(l1)  # To increase numerical stability,
        # subtracting the median of l1 from l1 & l2 later

        print('neef: ', neff)
        s1 = neff / (neff + N2)
        s2 = N2 / (neff + N2)
        r = r0
        r_vals = [r]
        logml = np.log(r) + lstar
        criterion_val = 1 + tol
        i = 0
        while (i <= maxiter) & (criterion_val > tol):
            print('i: ', i)
            print('maxiter', maxiter)
            print('criterionval: ', criterion_val)
            print('tol: ', tol)
            rold = r
            logmlold = logml

            numi = np.exp(l2 - lstar) / (s1 * np.exp(l2 - lstar) + s2 * r)
            print('l2: ', l2)
            print('lstar: ', lstar)
            print('s1: ', s1)
            print('r :', r)
            print('Num: ', numi)
            deni = 1 / (s1 * np.exp(l1 - lstar) + s2 * r)
            print('Den: ', deni)

            if np.sum(~np.isfinite(numi)) + np.sum(~np.isfinite(deni)) > 0:
                warn("""Infinite value in iterative scheme, returning NaN.
                Try rerunning with more samples.""")
            r = (N1 / N2) * np.sum(numi) / np.sum(deni)
            print('r: ', r)
            r_vals.append(r)
            logml = np.log(r) + lstar
            print('Logml: ', logml)
            i += 1
            if criterion == 'r':
                criterion_val = np.abs((r - rold) / r)
            elif criterion == 'logml':
                criterion_val = np.abs((logml - logmlold) / logml)
            print('criterion val: ', criterion_val)

        if i >= maxiter:
            return dict(logml=np.NaN, niter=i, r_vals=np.asarray(r_vals))
        else:
            return dict(logml=logml, niter=i)

    tmp = iterative_scheme(q11, q12, q21, q22, r0, neff, tol1, maxiter, 'r')
    if ~np.isfinite(tmp['logml']):
        warn("""logml could not be estimated within maxiter, rerunning with
                      adjusted starting value. Estimate might be more variable than usual."""
             )
        # use geometric mean as starting value
        r0_2 = np.sqrt(tmp['r_vals'][-2] * tmp['r_vals'][-1])

        tmp = iterative_scheme(q11, q12, q21, q22, r0_2, neff, tol2, maxiter,
                               'r')

    return dict(logml=tmp['logml'],
                niter=tmp['niter'],
                method="normal",
                q11=q11,
                q12=q12,
                q21=q21,
                q22=q22)
Ejemplo n.º 17
0
def sample_smc(
    draws=2000,
    kernel="metropolis",
    n_steps=25,
    start=None,
    tune_steps=True,
    p_acc_rate=0.85,
    threshold=0.5,
    save_sim_data=False,
    save_log_pseudolikelihood=True,
    model=None,
    random_seed=-1,
    parallel=False,
    chains=None,
    cores=None,
):
    r"""
    Sequential Monte Carlo based sampling.

    Parameters
    ----------
    draws: int
        The number of samples to draw from the posterior (i.e. last stage). And also the number of
        independent chains. Defaults to 2000.
    kernel: str
        Kernel method for the SMC sampler. Available option are ``metropolis`` (default) and `ABC`.
        Use `ABC` for likelihood free inference together with a ``pm.Simulator``.
    n_steps: int
        The number of steps of each Markov Chain. If ``tune_steps == True`` ``n_steps`` will be used
        for the first stage and for the others it will be determined automatically based on the
        acceptance rate and `p_acc_rate`, the max number of steps is ``n_steps``.
    start: dict, or array of dict
        Starting point in parameter space. It should be a list of dict with length `chains`.
        When None (default) the starting point is sampled from the prior distribution.
    tune_steps: bool
        Whether to compute the number of steps automatically or not. Defaults to True
    p_acc_rate: float
        Used to compute ``n_steps`` when ``tune_steps == True``. The higher the value of
        ``p_acc_rate`` the higher the number of steps computed automatically. Defaults to 0.85.
        It should be between 0 and 1.
    threshold: float
        Determines the change of beta from stage to stage, i.e.indirectly the number of stages,
        the higher the value of `threshold` the higher the number of stages. Defaults to 0.5.
        It should be between 0 and 1.
    save_sim_data : bool
        Whether or not to save the simulated data. This parameter only works with the ABC kernel.
        The stored data corresponds to a samples from the posterior predictive distribution.
    save_log_pseudolikelihood : bool
        Whether or not to save the log pseudolikelihood values. This parameter only works with the
        ABC kernel. The stored data can be used to compute LOO or WAIC values. Computing LOO/WAIC
        values from log pseudolikelihood values is experimental.
    model: Model (optional if in ``with`` context)).
    random_seed: int
        random seed
    parallel: bool
        Distribute computations across cores if the number of cores is larger than 1.
        Defaults to False.
    cores : int
        The number of chains to run in parallel. If ``None``, set to the number of CPUs in the
        system, but at most 4.
    chains : int
        The number of chains to sample. Running independent chains is important for some
        convergence statistics. If ``None`` (default), then set to either ``cores`` or 2, whichever
        is larger.

    Notes
    -----
    SMC works by moving through successive stages. At each stage the inverse temperature
    :math:`\beta` is increased a little bit (starting from 0 up to 1). When :math:`\beta` = 0
    we have the prior distribution and when :math:`\beta` =1 we have the posterior distribution.
    So in more general terms we are always computing samples from a tempered posterior that we can
    write as:

    .. math::

        p(\theta \mid y)_{\beta} = p(y \mid \theta)^{\beta} p(\theta)

    A summary of the algorithm is:

     1. Initialize :math:`\beta` at zero and stage at zero.
     2. Generate N samples :math:`S_{\beta}` from the prior (because when :math `\beta = 0` the
        tempered posterior is the prior).
     3. Increase :math:`\beta` in order to make the effective sample size equals some predefined
        value (we use :math:`Nt`, where :math:`t` is 0.5 by default).
     4. Compute a set of N importance weights W. The weights are computed as the ratio of the
        likelihoods of a sample at stage i+1 and stage i.
     5. Obtain :math:`S_{w}` by re-sampling according to W.
     6. Use W to compute the mean and covariance for the proposal distribution, a MVNormal.
     7. For stages other than 0 use the acceptance rate from the previous stage to estimate
        `n_steps`.
     8. Run N independent Metropolis-Hastings (IMH) chains (each one of length `n_steps`),
        starting each one from a different sample in :math:`S_{w}`. Samples are IMH as the proposal
        mean is the of the previous posterior stage and not the current point in parameter space.
     9. Repeat from step 3 until :math:`\beta \ge 1`.
     10. The final result is a collection of N samples from the posterior.


    References
    ----------
    .. [Minson2013] Minson, S. E. and Simons, M. and Beck, J. L., (2013),
        Bayesian inversion for finite fault earthquake source models I- Theory and algorithm.
        Geophysical Journal International, 2013, 194(3), pp.1701-1726,
        `link <https://gji.oxfordjournals.org/content/194/3/1701.full>`__

    .. [Ching2007] Ching, J. and Chen, Y. (2007).
        Transitional Markov Chain Monte Carlo Method for Bayesian Model Updating, Model Class
        Selection, and Model Averaging. J. Eng. Mech., 10.1061/(ASCE)0733-9399(2007)133:7(816),
        816-832. `link <http://ascelibrary.org/doi/abs/10.1061/%28ASCE%290733-9399
        %282007%29133:7%28816%29>`__
    """
    _log = logging.getLogger("pymc3")
    _log.info("Initializing SMC sampler...")

    model = modelcontext(model)
    if model.name:
        raise NotImplementedError(
            "The SMC implementation currently does not support named models. "
            "See https://github.com/pymc-devs/pymc3/pull/4365.")
    if cores is None:
        cores = _cpu_count()

    if chains is None:
        chains = max(2, cores)
    elif chains == 1:
        cores = 1

    _log.info(f"Sampling {chains} chain{'s' if chains > 1 else ''} "
              f"in {cores} job{'s' if cores > 1 else ''}")

    if random_seed == -1:
        random_seed = None
    if chains == 1 and isinstance(random_seed, int):
        random_seed = [random_seed]
    if random_seed is None or isinstance(random_seed, int):
        if random_seed is not None:
            np.random.seed(random_seed)
        random_seed = [np.random.randint(2**30) for _ in range(chains)]
    if not isinstance(random_seed, Iterable):
        raise TypeError(
            "Invalid value for `random_seed`. Must be tuple, list or int")

    if kernel.lower() == "abc":
        if len(model.observed_RVs) != 1:
            warnings.warn(
                "SMC-ABC only works properly with models with one observed variable"
            )
        if model.potentials:
            _log.info("Potentials will be added to the prior term")

    params = (
        draws,
        kernel,
        n_steps,
        start,
        tune_steps,
        p_acc_rate,
        threshold,
        save_sim_data,
        save_log_pseudolikelihood,
        model,
    )

    t1 = time.time()
    if parallel and chains > 1:
        loggers = [_log] + [None] * (chains - 1)
        pool = mp.Pool(cores)
        results = pool.starmap(sample_smc_int,
                               [(*params, random_seed[i], i, loggers[i])
                                for i in range(chains)])

        pool.close()
        pool.join()
    else:
        results = []
        for i in range(chains):
            results.append(sample_smc_int(*params, random_seed[i], i, _log))

    (
        traces,
        sim_data,
        log_marginal_likelihoods,
        log_pseudolikelihood,
        betas,
        accept_ratios,
        nsteps,
    ) = zip(*results)
    trace = MultiTrace(traces)
    trace.report._n_draws = draws
    trace.report._n_tune = 0
    trace.report.log_marginal_likelihood = np.array(log_marginal_likelihoods)
    trace.report.log_pseudolikelihood = log_pseudolikelihood
    trace.report.betas = betas
    trace.report.accept_ratios = accept_ratios
    trace.report.nsteps = nsteps
    trace.report._t_sampling = time.time() - t1

    if save_sim_data:
        return trace, {
            modelcontext(model).observed_RVs[0].name: np.array(sim_data)
        }
    else:
        return trace
Ejemplo n.º 18
0
def opt_nfo(
        #Optimization parameters
        #initialization
        n0=10,  #int, n0 the initial number of draws
        init_samples=None,  #array, Whether to provide some pre-defined sequence or do pymc3 sampling
        #approximation
    k_trunc=np.inf,  #IW clipping, not used by default
        eps_z=0.01,  #float, tolerance on Z for q iter convergence (eps') #currently not used since not iterating SINF unless trainable
        nf_iter=1,  #int, number of NF iters -should always be 1 in our implementation
        #annealing
    N=10,  #int, N the TOTAL number of draws we want at each iteration - this is no longer used, is from when we used to run multiple fits
        t_ess=0.5,  #float, ESS<t_ess*n0 t threshold on ESS for ESS3 (no longer temperature)
        g_AF=0,  #float, size of gradient contribution to AF, not used now
        #exploration
    N_AF=1000,  #int,number of points to use in q_w sampling for AF
        expl_top_AF=1,  #int,cut for the top AF at a given temp level accepted at each beta
        expl_latent=0,  #int,latent draw from around top IW1 or around random draw from q_w, accepted at each step
        expl_top_qw=0,  #int,keep top q_w at this iteration
        beta_max=1,  #float>0,highest exponent on tempered posterior, support >1 for exploitation
        rel_beta=1,  #0<float<1, β2 = rel_beta*β, where β2 is the lower temp level used for sampling q_w, what we call 'X'
        frac_rel_beta_AF=1,  #int, the modifier to the AF used to up/down-weight the w vs uw contribution, what we call "Y"
        latent_sigma=None,  #float, the value of l
        use_latent_beta2=False,  #whether to get the latent sample from q_w(β2) or from q_uw
        use_pq_beta_IW1=False,  #whether to get the latent sample from near top IW1 or randomly from q_w
        bounds=None,  #array, size 2xd, bounding box for samples FIXME make this more obvious, needed for prior
        N_temp=25,  #int, cutoff on number of allowed temp iterations before giving up -> #FIXME eventually make this throw error
        #NF parameters
    model=None,
        frac_validate=0.0,
        iteration=None,
        alpha_w=(0, 0),
        alpha_uw=(0, 0),
        verbose=False,
        n_component=None,
        interp_nbin=None,
        KDE=True,
        bw_factor_min=1.0,
        bw_factor_max=1.0,
        bw_factor_num=1,
        rel_bw=1,
        edge_bins=None,
        ndata_wT=None,
        MSWD_max_iter=None,
        NBfirstlayer=True,
        logit=False,
        Whiten=False,
        trainable_qw=False,  #whether to improve our q_w at each beta iteration with SGD
        sgd_steps=0,  #number of steps used in Adam when training trainable q_w
        knots_trainable=5,
        batchsize=None,
        nocuda=False,
        patch=False,
        shape=[28, 28, 1],
        #Runtime
        random_seed=-1,
        parallel=False,
        cores=None):
    r"""
    Normalizing flow-based Bayesian Optimization.

    Parameters
    ----------
    draws: int
        The number of samples to draw from the posterior (i.e. last stage). And also the number of
        independent chains. Defaults to 2000.
    norm_tol: float
        Fractional difference in the evidence estimate between two steps. If it falls below this we
        stop iterating over the NF fits.
    optim_iter: int
        Maximum number of optimization steps to run during the initialization.
    nf_iter: int
        Number of NF fit iterations to go through after the optimization step.
    model: Model (optional if in ``with`` context)).
    frac_validate: float
        Fraction of the live points at each NS iteration that we use for validation of the NF fit.
    alpha: tuple of floats
        Regularization parameters used for the NF fit.
    verbose: boolean
        Whether you want verbose output from the NF fit.
    random_seed: int
        random seed
    parallel: bool
        Distribute computations across cores if the number of cores is larger than 1.
        Defaults to False.
    cores : int
        Number of cores available for the optimization step. Defaults to None, in which case the CPU
        count is used.

    """

    _log = logging.getLogger("pymc3")
    _log.info("Initializing normalizing flow-based optimization...")

    model = modelcontext(model)
    if model.name:
        raise NotImplementedError(
            "The NS_NFO implementation currently does not support named models. "
            "See https://github.com/pymc-devs/pymc3/pull/4365.")
    if cores is None:
        cores = _cpu_count()
    chains = 1

    _log.info(f"Sampling {chains} chain{'s' if chains > 1 else ''} "
              f"Cores available for optimization: {cores}")
    if random_seed == -1:
        random_seed = None
    if chains == 1 and isinstance(random_seed, int):
        random_seed = [random_seed]
    if random_seed is None or isinstance(random_seed, int):
        if random_seed is not None:
            np.random.seed(random_seed)
        random_seed = [np.random.randint(2**30) for _ in range(chains)]
    if not isinstance(random_seed, Iterable):
        raise TypeError(
            "Invalid value for `random_seed`. Must be tuple, list or int")

    #we changed the name for end-user-facing readability, but internally more familiar with these names
    aN, bN, cN, dN = N_AF, expl_top_AF, expl_latent, expl_top_qw

    params = (
        n0,
        init_samples,
        k_trunc,
        eps_z,
        nf_iter,
        N,
        t_ess,
        g_AF,
        aN,
        bN,
        cN,
        dN,
        beta_max,
        rel_beta,
        frac_rel_beta_AF,
        latent_sigma,
        use_latent_beta2,
        use_pq_beta_IW1,
        bounds,
        N_temp,
        model,
        frac_validate,
        iteration,
        alpha_w,
        alpha_uw,
        cores,
        verbose,
        n_component,
        interp_nbin,
        KDE,
        bw_factor_min,
        bw_factor_max,
        bw_factor_num,
        rel_bw,
        edge_bins,
        ndata_wT,
        MSWD_max_iter,
        NBfirstlayer,
        logit,
        Whiten,
        trainable_qw,
        sgd_steps,
        knots_trainable,
        batchsize,
        nocuda,
        patch,
        shape,
        parallel,
    )

    t1 = time.time()

    results = []
    for i in range(chains):
        results.append(opt_nfo_int(*params, random_seed[i], i, _log))
    (
        traces,
        log_evidence,
        q_samples,
        importance_weights,
        logp,
        logq,
        train_logp,
        train_logq,
        logZ,
        q_models,
        q_ess,
        total_ess,
        min_var_bws,
        min_pq_bws,
        betas,
    ) = zip(*results)
    trace = MultiTrace(traces)
    trace.report.log_evidence = log_evidence
    trace.report.q_samples = q_samples
    trace.report.importance_weights = importance_weights
    trace.report.logp = logp
    trace.report.logq = logq
    trace.report.train_logp = train_logp
    trace.report.train_logq = train_logq
    trace.report.logZ = logZ
    trace.report.q_models = q_models
    trace.report.q_ess = q_ess
    trace.report.total_ess = total_ess
    trace.report.N = N
    trace.report.min_var_bws = min_var_bws
    trace.report.min_pq_bws = min_pq_bws
    trace.report._t_sampling = time.time() - t1
    trace.report.betas = betas
    return trace
Ejemplo n.º 19
0
def sample_ns_nfmc(
    draws=2000,
    start=None,
    rho=0.01,
    epsilon=0.01,
    model=None,
    frac_validate=0.8,
    alpha=(0,0),
    verbose=False,
    random_seed=-1,
    parallel=False,
    chains=None,
    cores=None,
):
    r"""
    Normalizing flow based nested sampling.

    Parameters
    ----------
    draws: int
        The number of samples to draw from the posterior (i.e. last stage). And also the number of
        independent chains. Defaults to 2000.
    start: dict, or array of dict
        Starting point in parameter space. It should be a list of dict with length `chains`.
        When None (default) the starting point is sampled from the prior distribution.
    rho: float
        Sets fraction of points we want to be above the likelihood threshold at each iteration.
        Used to adaptively set the likelihood threshold during sampling.
    epsilon: float
        Stopping factor for the algorithm. At each iteration we compare the ratio of the evidences
        from the current and previous iterations. If it is less than 1-epsilon we stop.
    model: Model (optional if in ``with`` context)).
    frac_validate: float
        Fraction of the live points at each NS iteration that we use for validation of the NF fit.
    alpha: tuple of floats
        Regularization parameters used for the NF fit. 
    verbose: boolean
        Whether you want verbose output from the NF fit.
    random_seed: int
        random seed
    parallel: bool
        Distribute computations across cores if the number of cores is larger than 1.
        Defaults to False.
    cores : int
        The number of chains to run in parallel. If ``None``, set to the number of CPUs in the
        system, but at most 4.
    chains : int
        The number of chains to sample. Running independent chains is important for some
        convergence statistics. If ``None`` (default), then set to either ``cores`` or 2, whichever
        is larger.

    """
    _log = logging.getLogger("pymc3")
    _log.info("Initializing normalizing flow based nested sampling...")

    model = modelcontext(model)
    if model.name:
        raise NotImplementedError(
            "The NS_NFMC implementation currently does not support named models. "
            "See https://github.com/pymc-devs/pymc3/pull/4365."
        )
    if cores is None:
        cores = _cpu_count()

    if chains is None:
        chains = max(2, cores)
    elif chains == 1:
        cores = 1

    _log.info(
        f"Sampling {chains} chain{'s' if chains > 1 else ''} "
        f"in {cores} job{'s' if cores > 1 else ''}"
    )

    if random_seed == -1:
        random_seed = None
    if chains == 1 and isinstance(random_seed, int):
        random_seed = [random_seed]
    if random_seed is None or isinstance(random_seed, int):
        if random_seed is not None:
            np.random.seed(random_seed)
        random_seed = [np.random.randint(2 ** 30) for _ in range(chains)]
    if not isinstance(random_seed, Iterable):
        raise TypeError("Invalid value for `random_seed`. Must be tuple, list or int")

    params = (
        draws,
        start,
        rho,
        epsilon,
        model,
        frac_validate,
        alpha,
        verbose,
    )

    t1 = time.time()
    if parallel and chains > 1:
        loggers = [_log] + [None] * (chains - 1)
        pool = mp.Pool(cores)
        results = pool.starmap(
            sample_ns_nfmc_int, [(*params, random_seed[i], i, loggers[i]) for i in range(chains)]
        )

        pool.close()
        pool.join()
    else:
        results = []
        for i in range(chains):
            results.append(sample_ns_nfmc_int(*params, random_seed[i], i, _log))

    (
        traces,
        log_evidence,
        log_evidences,
        likelihood_logp_thresh,
    ) = zip(*results)
    trace = MultiTrace(traces)
    trace.report._n_draws = draws
    trace.report.log_evidence = np.array(log_evidence)
    trace.report._t_sampling = time.time() - t1

    return trace
Ejemplo n.º 20
0
def _iter_sample(draws,
                 step,
                 start=None,
                 trace=None,
                 chain=0,
                 tune=None,
                 model=None,
                 random_seed=-1,
                 overwrite=True,
                 update_proposal=False,
                 keep_last=False):
    """
    Modified from :func:`pymc3.sampling._iter_sample`

    tune: int
        adaptiv step-size scaling is stopped after this chain sample
    """

    model = modelcontext(model)

    draws = int(draws)

    if draws < 1:
        raise ValueError('Argument `draws` should be above 0.')

    if start is None:
        start = {}

    if random_seed != -1:
        seed(random_seed)

    try:
        step = CompoundStep(step)
    except TypeError:
        pass

    point = Point(start, model=model)

    step.chain_index = chain

    trace.setup(draws, chain, overwrite=overwrite)
    for i in range(draws):
        if i == tune:
            step = stop_tuning(step)

        logger.debug('Step: Chain_%i step_%i' % (chain, i))
        point, out_list = step.step(point)

        try:
            trace.buffer_write(out_list, step.cumulative_samples)
        except BufferError:  # buffer full
            last_sample = deepcopy(trace.buffer[-1])
            if update_proposal:  # only valid for PT for now
                if step.proposal_name in multivariate_proposals:
                    cov = trace.get_sample_covariance(step)
                    if cov is not None:
                        if not isinstance(trace, MemoryChain):
                            filename = '%s/proposal_cov_chain_%i_%i.%s' % (
                                trace.dir_path, trace.chain, trace.cov_counter,
                                'png')
                            from matplotlib import pyplot as plt
                            fig, axs = plt.subplots(1, 1)
                            im = axs.imshow(cov, aspect='auto')
                            plt.colorbar(im)
                            fig.savefig(filename, dpi=150)
                            plt.close(fig)

                        step.proposal_dist = choose_proposal(
                            step.proposal_name, scale=cov)

            trace.record_buffer()
            if keep_last:
                # put last sample back
                trace.buffer_write(*last_sample)

        yield trace
Ejemplo n.º 21
0
    def __init__(
        self,
        *,
        trace=None,
        prior=None,
        posterior_predictive=None,
        log_likelihood=True,
        predictions=None,
        coords: Optional[CoordSpec] = None,
        dims: Optional[DimSpec] = None,
        model=None,
        save_warmup: Optional[bool] = None,
        density_dist_obs: bool = True,
        index_origin: Optional[int] = None,
    ):

        self.save_warmup = rcParams[
            "data.save_warmup"] if save_warmup is None else save_warmup
        self.trace = trace

        # this permits us to get the model from command-line argument or from with model:
        self.model = modelcontext(model)

        self.attrs = None
        if trace is not None:
            self.nchains = trace.nchains if hasattr(trace, "nchains") else 1
            if hasattr(trace.report,
                       "n_draws") and trace.report.n_draws is not None:
                self.ndraws = trace.report.n_draws
                self.attrs = {
                    "sampling_time": trace.report.t_sampling,
                    "tuning_steps": trace.report.n_tune,
                }
            else:
                self.ndraws = len(trace)
                if self.save_warmup:
                    warnings.warn(
                        "Warmup samples will be stored in posterior group and will not be"
                        " excluded from stats and diagnostics."
                        " Do not slice the trace manually before conversion",
                        UserWarning,
                    )
            self.ntune = len(self.trace) - self.ndraws
            self.posterior_trace, self.warmup_trace = self.split_trace()
        else:
            self.nchains = self.ndraws = 0

        self.prior = prior
        self.posterior_predictive = posterior_predictive
        self.log_likelihood = log_likelihood
        self.predictions = predictions
        self.index_origin = rcParams[
            "data.index_origin"] if index_origin is None else index_origin

        def arbitrary_element(dct: Dict[Any, np.ndarray]) -> np.ndarray:
            return next(iter(dct.values()))

        if trace is None:
            # if you have a posterior_predictive built with keep_dims,
            # you'll lose here, but there's nothing I can do about that.
            self.nchains = 1
            get_from = None
            if predictions is not None:
                get_from = predictions
            elif posterior_predictive is not None:
                get_from = posterior_predictive
            elif prior is not None:
                get_from = prior
            if get_from is None:
                # pylint: disable=line-too-long
                raise ValueError(
                    "When constructing InferenceData must have at least"
                    " one of trace, prior, posterior_predictive or predictions."
                )

            aelem = arbitrary_element(get_from)
            self.ndraws = aelem.shape[0]

        self.coords = {} if coords is None else coords
        if hasattr(self.model, "coords"):
            self.coords = {**self.model.coords, **self.coords}
        self.coords = {
            key: value
            for key, value in self.coords.items() if value is not None
        }

        self.dims = {} if dims is None else dims
        if hasattr(self.model, "RV_dims"):
            model_dims = {
                var_name: [dim for dim in dims if dim is not None]
                for var_name, dims in self.model.RV_dims.items()
            }
            self.dims = {**model_dims, **self.dims}

        self.density_dist_obs = density_dist_obs
        self.observations = self.find_observations()
Ejemplo n.º 22
0
Archivo: smc.py Proyecto: shineusn/beat
def smc_sample(
        n_steps, step=None, start=None, homepath=None, chain=0,
        stage=0, n_jobs=1, tune=None, progressbar=False, buffer_size=5000,
        model=None, update=None, random_seed=None, rm_flag=False):
    """
    Sequential Monte Carlo samlping

    Samples the solution space with n_chains of Metropolis chains, where each
    chain has n_steps iterations. Once finished, the sampled traces are
    evaluated:

    (1) Based on the likelihoods of the final samples, chains are weighted
    (2) the weighted covariance of the ensemble is calculated and set as new
        proposal distribution
    (3) the variation in the ensemble is calculated and the next tempering
        parameter (beta) calculated
    (4) New n_chains Metropolis chains are seeded on the traces with high
        weight for n_steps iterations
    (5) Repeat until beta > 1.

    Parameters
    ----------
    n_steps : int
        The number of samples to draw for each Markov-chain per stage
    step : :class:`SMC`
        SMC initialisation object
    start : List of dictionaries
        with length of (n_chains)
        Starting points in parameter space (or partial point)
        Defaults to random draws from variables (defaults to empty dict)
    chain : int
        Chain number used to store sample in backend. If `n_jobs` is
        greater than one, chain numbers will start here.
    stage : int
        Stage where to start or continue the calculation. It is possible to
        continue after completed stages (stage should be the number of the
        completed stage + 1). If None the start will be at stage = 0.
    n_jobs : int
        The number of cores to be used in parallel. Be aware that theano has
        internal parallelisation. Sometimes this is more efficient especially
        for simple models.
        step.n_chains / n_jobs has to be an integer number!
    tune : int
        Number of iterations to tune, if applicable (defaults to None)
    homepath : string
        Result_folder for storing stages, will be created if not existing.
    progressbar : bool
        Flag for displaying a progress bar
    buffer_size : int
        this is the number of samples after which the buffer is written to disk
        or if the chain end is reached
    model : :class:`pymc3.Model`
        (optional if in `with` context) has to contain deterministic
        variable name defined under step.likelihood_name' that contains the
        model likelihood
    update : :py:class:`models.Problem`
        Problem object that contains all the observed data and (if applicable)
        covariances to be updated each transition step.
    rm_flag : bool
        If True existing stage result folders are being deleted prior to
        sampling.

    References
    ----------
    .. [Minson2013] Minson, S. E. and Simons, M. and Beck, J. L., (2013),
        Bayesian inversion for finite fault earthquake source models
        I- Theory and algorithm. Geophysical Journal International, 2013,
        194(3), pp.1701-1726,
        `link <https://gji.oxfordjournals.org/content/194/3/1701.full>`__
    """

    model = modelcontext(model)
    step.n_steps = int(n_steps)

    if n_steps < 1:
        raise TypeError('Argument `n_steps` should be above 0.', exc_info=1)

    if step is None:
        raise TypeError('Argument `step` has to be a SMC step object.')

    if homepath is None:
        raise TypeError(
            'Argument `homepath` should be path to result_directory.')

    if n_jobs > 1:
        if not (step.n_chains / float(n_jobs)).is_integer():
            raise ValueError('n_chains / n_jobs has to be a whole number!')

    if start is not None:
        if len(start) != step.n_chains:
            raise TypeError('Argument `start` should have dicts equal the '
                            'number of chains (step.N-chains)')
        else:
            step.population = start

    if not any(
            step.likelihood_name in var.name for var in model.deterministics):
            raise TypeError('Model (deterministic) variables need to contain '
                            'a variable %s '
                            'as defined in `step`.' % step.likelihood_name)

    stage_handler = backend.TextStage(homepath)

    chains, step, update = init_stage(
        stage_handler=stage_handler,
        step=step,
        stage=stage,
        progressbar=progressbar,
        update=update,
        model=model,
        rm_flag=rm_flag)

    with model:
        while step.beta < 1.:
            if step.stage == 0:
                # Initial stage
                logger.info('Sample initial stage: ...')
                draws = 1
            else:
                draws = n_steps

            logger.info('Beta: %f Stage: %i' % (step.beta, step.stage))

            # Metropolis sampling intermediate stages
            chains = stage_handler.clean_directory(step.stage, chains, rm_flag)

            sample_args = {
                'draws': draws,
                'step': step,
                'stage_path': stage_handler.stage_path(step.stage),
                'progressbar': progressbar,
                'model': model,
                'n_jobs': n_jobs,
                'chains': chains,
                'buffer_size': buffer_size}

            mtrace = iter_parallel_chains(**sample_args)

            step.population, step.array_population, step.likelihoods = \
                step.select_end_points(mtrace)

            if update is not None:
                logger.info('Updating Covariances ...')
                mean_pt = step.mean_end_points()
                update.update_weights(mean_pt, n_jobs=n_jobs)
                mtrace = update_last_samples(
                    homepath, step, progressbar, model, n_jobs, rm_flag)
                step.population, step.array_population, step.likelihoods = \
                    step.select_end_points(mtrace)

            step.beta, step.old_beta, step.weights = step.calc_beta()

            if step.beta > 1.:
                logger.info('Beta > 1.: %f' % step.beta)
                step.beta = 1.
                outparam_list = [step.get_sampler_state(), update]
                stage_handler.dump_atmip_params(step.stage, outparam_list)
                if stage == -1:
                    chains = []
                else:
                    chains = None
            else:
                step.covariance = step.calc_covariance()
                step.proposal_dist = choose_proposal(
                    step.proposal_name, scale=step.covariance)
                step.resampling_indexes = step.resample()
                step.chain_previous_lpoint = \
                    step.get_chain_previous_lpoint(mtrace)

                outparam_list = [step.get_sampler_state(), update]
                stage_handler.dump_atmip_params(step.stage, outparam_list)

                step.stage += 1
                del(mtrace)

        # Metropolis sampling final stage
        logger.info('Sample final stage')
        step.stage = -1

        temp = np.exp((1 - step.old_beta) *
                      (step.likelihoods - step.likelihoods.max()))
        step.weights = temp / np.sum(temp)
        step.covariance = step.calc_covariance()
        step.proposal_dist = choose_proposal(
            step.proposal_name, scale=step.covariance)

        step.resampling_indexes = step.resample()
        step.chain_previous_lpoint = step.get_chain_previous_lpoint(mtrace)

        sample_args['step'] = step
        sample_args['stage_path'] = stage_handler.stage_path(step.stage)
        sample_args['chains'] = chains
        iter_parallel_chains(**sample_args)

        outparam_list = [step.get_sampler_state(), update]
        stage_handler.dump_atmip_params(step.stage, outparam_list)
        logger.info('Finished sampling!')
Ejemplo n.º 23
0
def sample(
    *,
    draws=1000,
    tune=1000,
    model=None,
    step_kwargs=None,
    warmup_window=50,
    adapt_window=50,
    cooldown_window=100,
    initial_accept=None,
    target_accept=0.9,
    gamma=0.05,
    k=0.75,
    t0=10,
    **kwargs,
):
    # Check that we're in a model context and that all the variables are
    # continuous
    model = modelcontext(model)
    if not all_continuous(model.vars):
        raise ValueError("NUTS can only be used for models with only "
                         "continuous variables.")
    start = kwargs.get("start", None)
    if start is None:
        start = model.test_point
    mean = model.dict_to_array(start)

    update_steps = build_schedule(
        tune,
        warmup_window=warmup_window,
        adapt_window=adapt_window,
        cooldown_window=cooldown_window,
    )

    potential = QuadPotentialDenseAdapt(
        model.ndim,
        initial_mean=mean,
        initial_weight=10,
        update_steps=update_steps,
    )

    if "step" in kwargs:
        step = kwargs["step"]
    else:
        if step_kwargs is None:
            step_kwargs = {}
        step = pm.NUTS(
            potential=potential,
            model=model,
            target_accept=target_accept,
            **step_kwargs,
        )

    if "target_accept" in step_kwargs and target_accept is not None:
        raise ValueError(
            "'target_accept' cannot be given as a keyword argument and in "
            "'step_kwargs'")
    target_accept = step_kwargs.pop("target_accept", target_accept)
    if initial_accept is None:
        target = target_accept
    else:
        if initial_accept > target_accept:
            raise ValueError(
                "initial_accept must be less than or equal to target_accept")
        target = initial_accept + (target_accept - initial_accept) * np.sqrt(
            np.arange(len(update_steps)) / (len(update_steps) - 1))
    step.step_adapt = WindowedDualAverageAdaptation(update_steps,
                                                    step.step_size, target,
                                                    gamma, k, t0)

    kwargs["step"] = step
    return pm.sample(draws=draws, tune=tune, model=model, **kwargs)
Ejemplo n.º 24
0
    def __init__(self,
                 vars=None,
                 out_vars=None,
                 covariance=None,
                 scale=1.,
                 n_chains=100,
                 tune=True,
                 tune_interval=100,
                 model=None,
                 check_bound=True,
                 likelihood_name='like',
                 proposal_name='MultivariateNormal',
                 coef_variation=1.,
                 **kwargs):

        model = modelcontext(model)

        if vars is None:
            vars = model.vars

        vars = inputvars(vars)

        if out_vars is None:
            out_vars = model.unobserved_RVs

        out_varnames = [out_var.name for out_var in out_vars]

        self.scaling = np.atleast_1d(scale)

        if covariance is None and proposal_name == 'MultivariateNormal':
            self.covariance = np.eye(sum(v.dsize for v in vars))
            scale = self.covariance

        self.tune = tune
        self.check_bnd = check_bound
        self.tune_interval = tune_interval
        self.steps_until_tune = tune_interval

        self.proposal_name = proposal_name
        self.proposal_dist = choose_proposal(self.proposal_name, scale=scale)

        self.proposal_samples_array = self.proposal_dist(n_chains)

        self.stage_sample = 0
        self.accepted = 0

        self.beta = 0
        self.stage = 0
        self.chain_index = 0
        self.resampling_indexes = np.arange(n_chains)

        self.coef_variation = coef_variation
        self.n_chains = n_chains
        self.likelihoods = np.zeros(n_chains)

        self.likelihood_name = likelihood_name
        self._llk_index = out_varnames.index(likelihood_name)
        self.discrete = np.concatenate(
            [[v.dtype in discrete_types] * (v.dsize or 1) for v in vars])
        self.any_discrete = self.discrete.any()
        self.all_discrete = self.discrete.all()

        # create initial population
        self.population = []
        self.array_population = np.zeros(n_chains)
        for i in range(self.n_chains):
            dummy = pm.Point({v.name: v.random() for v in vars}, model=model)
            self.population.append(dummy)

        self.population[0] = model.test_point

        self.chain_previous_lpoint = copy.deepcopy(self.population)

        shared = make_shared_replacements(vars, model)
        self.logp_forw = logp_forw(out_vars, vars, shared)
        self.check_bnd = logp_forw([model.varlogpt], vars, shared)

        super(ATMCMC, self).__init__(vars, out_vars, shared)
Ejemplo n.º 25
0
def metropolis_sample(n_steps=10000,
                      homepath=None,
                      start=None,
                      backend='csv',
                      progressbar=False,
                      rm_flag=False,
                      buffer_size=5000,
                      buffer_thinning=1,
                      step=None,
                      model=None,
                      n_jobs=1,
                      update=None,
                      burn=0.5,
                      thin=2):
    """
    Execute Metropolis algorithm repeatedly depending on the number of chains.
    """

    # hardcoded stage here as there are no stages
    stage = 1
    model = modelcontext(model)
    step.n_steps = int(n_steps)

    if n_steps < 1:
        raise TypeError('Argument `n_steps` should be above 0.', exc_info=1)

    if step is None:
        raise TypeError('Argument `step` has to be a Metropolis step object.')

    if homepath is None:
        raise TypeError(
            'Argument `homepath` should be path to result_directory.')

    if n_jobs > 1:
        if not (step.n_chains / float(n_jobs)).is_integer():
            raise Exception('n_chains / n_jobs has to be a whole number!')

    if start is not None:
        if len(start) != step.n_chains:
            raise Exception('Argument `start` should have dicts equal the '
                            'number of chains (step.N-chains)')
        else:
            step.population = start

    if not any(step.likelihood_name in var.name
               for var in model.deterministics):
        raise Exception('Model (deterministic) variables need to contain '
                        'a variable %s '
                        'as defined in `step`.' % step.likelihood_name)

    stage_handler = backend.SampleStage(homepath, backend=step.backend)

    util.ensuredir(homepath)

    chains, step, update = init_stage(
        stage_handler=stage_handler,
        step=step,
        stage=0,  # needs zero otherwise tries to load stage_0 results
        progressbar=progressbar,
        update=update,
        model=model,
        rm_flag=rm_flag)

    with model:

        chains = stage_handler.clean_directory(step.stage, chains, rm_flag)

        logger.info('Sampling stage ...')

        draws = n_steps

        step.stage = stage

        sample_args = {
            'draws': draws,
            'step': step,
            'stage_path': stage_handler.stage_path(step.stage),
            'progressbar': progressbar,
            'model': model,
            'n_jobs': n_jobs,
            'buffer_size': buffer_size,
            'buffer_thinning': buffer_thinning,
            'chains': chains
        }

        mtrace = iter_parallel_chains(**sample_args)

        if step.proposal_name == 'MultivariateNormal':
            pdict, step.covariance = get_trace_stats(mtrace, step, burn, thin)

            step.proposal_dist = choose_proposal(step.proposal_name,
                                                 scale=step.covariance)

        if update is not None:
            logger.info('Updating Covariances ...')
            update.update_weights(pdict['dist_mean'], n_jobs=n_jobs)

            mtrace = update_last_samples(homepath, step, progressbar, model,
                                         n_jobs, rm_flag)

        elif update is not None and stage == 0:
            update.engine.close_cashed_stores()

        step.chain_previous_lpoint = step.get_chain_previous_lpoint(mtrace)

        outparam_list = [step.get_sampler_state(), update]
        stage_handler.dump_atmip_params(step.stage, outparam_list)

        # get_final_stage(homepath, n_stages, model=model)
        return stage_handler.load_multitrace(step.stage, model=model)
Ejemplo n.º 26
0
    def __init__(self,
                 vars=None,
                 scaling=None,
                 step_scale=0.25,
                 is_cov=False,
                 model=None,
                 blocked=True,
                 potential=None,
                 dtype=None,
                 Emax=1000,
                 target_accept=0.8,
                 gamma=0.05,
                 k=0.75,
                 t0=10,
                 adapt_step_size=True,
                 step_rand=None,
                 **aesara_kwargs):
        """Set up Hamiltonian samplers with common structures.

        Parameters
        ----------
        vars: list of aesara variables
        scaling: array_like, ndim = {1,2}
            Scaling for momentum distribution. 1d arrays interpreted matrix
            diagonal.
        step_scale: float, default=0.25
            Size of steps to take, automatically scaled down by 1/n**(1/4)
        is_cov: bool, default=False
            Treat scaling as a covariance matrix/vector if True, else treat
            it as a precision matrix/vector
        model: pymc3 Model instance
        blocked: bool, default=True
        potential: Potential, optional
            An object that represents the Hamiltonian with methods `velocity`,
            `energy`, and `random` methods.
        **aesara_kwargs: passed to aesara functions
        """
        self._model = modelcontext(model)

        if vars is None:
            vars = self._model.cont_vars
        vars = inputvars(vars)

        super().__init__(vars,
                         blocked=blocked,
                         model=model,
                         dtype=dtype,
                         **aesara_kwargs)

        self.adapt_step_size = adapt_step_size
        self.Emax = Emax
        self.iter_count = 0
        size = self._logp_dlogp_func.size

        self.step_size = step_scale / (size**0.25)
        self.step_adapt = step_sizes.DualAverageAdaptation(
            self.step_size, target_accept, gamma, k, t0)
        self.target_accept = target_accept
        self.tune = True

        if scaling is None and potential is None:
            mean = floatX(np.zeros(size))
            var = floatX(np.ones(size))
            potential = QuadPotentialDiagAdapt(size, mean, var, 10)

        if isinstance(scaling, dict):
            point = Point(scaling, model=model)
            scaling = guess_scaling(point, model=model, vars=vars)

        if scaling is not None and potential is not None:
            raise ValueError("Can not specify both potential and scaling.")

        if potential is not None:
            self.potential = potential
        else:
            self.potential = quad_potential(scaling, is_cov)

        self.integrator = integration.CpuLeapfrogIntegrator(
            self.potential, self._logp_dlogp_func)

        self._step_rand = step_rand
        self._warnings = []
        self._samples_after_tune = 0
        self._num_divs_sample = 0
Ejemplo n.º 27
0
    def __init__(self,
                 vars=None,
                 out_vars=None,
                 covariance=None,
                 scale=1.,
                 n_chains=100,
                 tune=True,
                 tune_interval=100,
                 model=None,
                 check_bound=True,
                 likelihood_name='like',
                 backend='csv',
                 proposal_name='MultivariateNormal',
                 **kwargs):

        model = modelcontext(model)

        if vars is None:
            vars = model.vars

        vars = inputvars(vars)

        if out_vars is None:
            out_vars = model.unobserved_RVs

        out_varnames = [out_var.name for out_var in out_vars]

        self.scaling = utility.scalar2floatX(num.atleast_1d(scale))

        if covariance is None and proposal_name in multivariate_proposals:
            self.covariance = num.eye(sum(v.dsize for v in vars))
            scale = self.covariance
        elif covariance is None:
            scale = num.ones(sum(v.dsize for v in vars))
        else:
            scale = covariance

        self.tune = tune
        self.check_bound = check_bound
        self.tune_interval = tune_interval
        self.steps_until_tune = tune_interval

        self.proposal_name = proposal_name
        self.proposal_dist = choose_proposal(self.proposal_name, scale=scale)

        self.proposal_samples_array = self.proposal_dist(n_chains)

        self.stage_sample = 0
        self.accepted = 0

        self.beta = 1.
        self.stage = 0
        self.chain_index = 0

        # needed to use the same parallel implementation function as for SMC
        self.resampling_indexes = num.arange(n_chains)

        self.n_chains = n_chains

        self.likelihood_name = likelihood_name
        self._llk_index = out_varnames.index(likelihood_name)
        self.backend = backend
        self.discrete = num.concatenate(
            [[v.dtype in discrete_types] * (v.dsize or 1) for v in vars])
        self.any_discrete = self.discrete.any()
        self.all_discrete = self.discrete.all()

        # create initial population
        self.population = []
        self.array_population = num.zeros(n_chains)
        for i in range(self.n_chains):
            self.population.append(
                Point({v.name: v.random()
                       for v in vars}, model=model))

        self.population[0] = model.test_point

        shared = make_shared_replacements(vars, model)
        self.logp_forw = logp_forw(out_vars, vars, shared)
        self.check_bnd = logp_forw([model.varlogpt], vars, shared)

        super(Metropolis, self).__init__(vars, out_vars, shared)

        self.chain_previous_lpoint = [[]] * self.n_chains
        self._tps = None
Ejemplo n.º 28
0
    def draw_value(self,
                   param,
                   trace: Optional[_TraceDict] = None,
                   givens=None):
        """Draw a set of random values from a distribution or return a constant.

        Parameters
        ----------
        param: number, array like, theano variable or pymc3 random variable
            The value or distribution. Constants or shared variables
            will be converted to an array and returned. Theano variables
            are evaluated. If `param` is a pymc3 random variable, draw
            values from it and return that (as ``np.ndarray``), unless a
            value is specified in the ``trace``.
        trace: pm.MultiTrace, optional
            A dictionary from pymc3 variable names to samples of their values
            used to provide context for evaluating ``param``.
        givens: dict, optional
            A dictionary from theano variables to their values. These values
            are used to evaluate ``param`` if it is a theano variable.
        """
        samples = self.samples

        def random_sample(
            meth: Callable[..., np.ndarray],
            param,
            point: _TraceDict,
            size: int,
            shape: Tuple[int, ...],
        ) -> np.ndarray:
            val = meth(point=point, size=size)
            try:
                assert val.shape == (size, ) + shape, (
                    "Sampling from random of %s yields wrong shape" % param)
            # error-quashing here is *extremely* ugly, but it seems to be what the logic in DensityDist wants.
            except AssertionError as e:
                if (hasattr(param, "distribution") and hasattr(
                        param.distribution, "wrap_random_with_dist_shape") and
                        not param.distribution.wrap_random_with_dist_shape):
                    pass
                else:
                    raise e

            return val

        if isinstance(param, (numbers.Number, np.ndarray)):
            return param
        elif isinstance(param, theano_constant):
            return param.value
        elif isinstance(param, tt.sharedvar.SharedVariable):
            return param.get_value()
        elif isinstance(param, (tt.TensorVariable, MultiObservedRV)):
            if hasattr(param,
                       "model") and trace and param.name in trace.varnames:
                return trace[param.name]
            elif hasattr(param, "random") and param.random is not None:
                model = modelcontext(None)
                assert isinstance(model, Model)
                shape: Tuple[int, ...] = tuple(_param_shape(param, model))
                return random_sample(param.random,
                                     param,
                                     point=trace,
                                     size=samples,
                                     shape=shape)
            elif (hasattr(param, "distribution")
                  and hasattr(param.distribution, "random")
                  and param.distribution.random is not None):
                if hasattr(param, "observations"):
                    # shape inspection for ObservedRV
                    dist_tmp = param.distribution
                    try:
                        distshape: Tuple[int, ...] = tuple(
                            param.observations.shape.eval())
                    except AttributeError:
                        distshape = tuple(param.observations.shape)

                    dist_tmp.shape = distshape
                    try:
                        return random_sample(
                            dist_tmp.random,
                            param,
                            point=trace,
                            size=samples,
                            shape=distshape,
                        )
                    except (ValueError, TypeError):
                        # reset shape to account for shape changes
                        # with theano.shared inputs
                        dist_tmp.shape = ()
                        # We want to draw values to infer the dist_shape,
                        # we don't want to store these drawn values to the context
                        with _DrawValuesContextBlocker():
                            point = trace[0] if trace else None
                            temp_val = np.atleast_1d(
                                dist_tmp.random(point=point, size=None))
                        # if hasattr(param, 'name') and param.name == 'obs':
                        #     import pdb; pdb.set_trace()
                        # Sometimes point may change the size of val but not the
                        # distribution's shape
                        if point and samples is not None:
                            temp_size = np.atleast_1d(samples)
                            if all(temp_val.shape[:len(temp_size)] ==
                                   temp_size):
                                dist_tmp.shape = tuple(
                                    temp_val.shape[len(temp_size):])
                            else:
                                dist_tmp.shape = tuple(temp_val.shape)
                        # I am not sure why I need to do this, but I do in order to trim off a
                        # degenerate dimension [2019/09/05:rpg]
                        if dist_tmp.shape[0] == 1 and len(dist_tmp.shape) > 1:
                            dist_tmp.shape = dist_tmp.shape[1:]
                        return random_sample(
                            dist_tmp.random,
                            point=trace,
                            size=samples,
                            param=param,
                            shape=tuple(dist_tmp.shape),
                        )
                else:  # has a distribution, but no observations
                    distshape = tuple(param.distribution.shape)
                    return random_sample(
                        meth=param.distribution.random,
                        param=param,
                        point=trace,
                        size=samples,
                        shape=distshape,
                    )
            # NOTE: I think the following is already vectorized.
            else:
                if givens:
                    variables, values = list(zip(*givens))
                else:
                    variables = values = []
                # We only truly care if the ancestors of param that were given
                # value have the matching dshape and val.shape
                param_ancestors = set(
                    theano.gof.graph.ancestors([param],
                                               blockers=list(variables)))
                inputs = [(var, val) for var, val in zip(variables, values)
                          if var in param_ancestors]
                if inputs:
                    input_vars, input_vals = list(zip(*inputs))
                else:
                    input_vars = []
                    input_vals = []
                func = _compile_theano_function(param, input_vars)
                if not input_vars:
                    assert input_vals == [
                    ]  # AFAICT if there are now vars, there can't be vals
                    output = func(*input_vals)
                    if hasattr(output, "shape"):
                        val = np.repeat(np.expand_dims(output, 0),
                                        samples,
                                        axis=0)
                    else:
                        val = np.full(samples, output)

                else:
                    val = func(*input_vals)
                    # np.ndarray([func(*input_vals) for inp in zip(*input_vals)])
                return val
        raise ValueError("Unexpected type in draw_value: %s" % type(param))
Ejemplo n.º 29
0
def find_MAP(start=None,
             vars=None,
             method="L-BFGS-B",
             return_raw=False,
             include_transformed=True,
             progressbar=True,
             maxeval=5000,
             model=None,
             *args,
             **kwargs):
    """Finds the local maximum a posteriori point given a model.

    `find_MAP` should not be used to initialize the NUTS sampler. Simply call
    ``pymc3.sample()`` and it will automatically initialize NUTS in a better
    way.

    Parameters
    ----------
    start: `dict` of parameter values (Defaults to `model.initial_point`)
    vars: list
        List of variables to optimize and set to optimum (Defaults to all continuous).
    method: string or callable
        Optimization algorithm (Defaults to 'L-BFGS-B' unless
        discrete variables are specified in `vars`, then
        `Powell` which will perform better).  For instructions on use of a callable,
        refer to SciPy's documentation of `optimize.minimize`.
    return_raw: bool
        Whether to return the full output of scipy.optimize.minimize (Defaults to `False`)
    include_transformed: bool, optional defaults to True
        Flag for reporting automatically transformed variables in addition
        to original variables.
    progressbar: bool, optional defaults to True
        Whether or not to display a progress bar in the command line.
    maxeval: int, optional, defaults to 5000
        The maximum number of times the posterior distribution is evaluated.
    model: Model (optional if in `with` context)
    *args, **kwargs
        Extra args passed to scipy.optimize.minimize

    Notes
    -----
    Older code examples used `find_MAP` to initialize the NUTS sampler,
    but this is not an effective way of choosing starting values for sampling.
    As a result, we have greatly enhanced the initialization of NUTS and
    wrapped it inside ``pymc3.sample()`` and you should thus avoid this method.
    """
    model = modelcontext(model)

    if vars is None:
        vars = model.cont_vars
        if not vars:
            raise ValueError("Model has no unobserved continuous variables.")
    vars = inputvars(vars)
    disc_vars = list(typefilter(vars, discrete_types))
    allinmodel(vars, model)
    start = copy.deepcopy(start)
    if start is None:
        start = model.initial_point
    else:
        model.update_start_vals(start, model.initial_point)
    model.check_start_vals(start)

    start = Point(start, model=model)

    x0 = DictToArrayBijection.map(start)

    # TODO: If the mapping is fixed, we can simply create graphs for the
    # mapping and avoid all this bijection overhead
    def logp_func(x):
        return DictToArrayBijection.mapf(model.fastlogp_nojac)(RaveledVars(
            x, x0.point_map_info))

    try:
        # This might be needed for calls to `dlogp_func`
        # start_map_info = tuple((v.name, v.shape, v.dtype) for v in vars)

        def dlogp_func(x):
            return DictToArrayBijection.mapf(model.fastdlogp_nojac(vars))(
                RaveledVars(x, x0.point_map_info))

        compute_gradient = True
    except (AttributeError, NotImplementedError, tg.NullTypeGradError):
        compute_gradient = False

    if disc_vars or not compute_gradient:
        pm._log.warning(
            "Warning: gradient not available." +
            "(E.g. vars contains discrete variables). MAP " +
            "estimates may not be accurate for the default " +
            "parameters. Defaulting to non-gradient minimization " +
            "'Powell'.")
        method = "Powell"

    if compute_gradient:
        cost_func = CostFuncWrapper(maxeval, progressbar, logp_func,
                                    dlogp_func)
    else:
        cost_func = CostFuncWrapper(maxeval, progressbar, logp_func)

    try:
        opt_result = minimize(cost_func,
                              x0.data,
                              method=method,
                              jac=compute_gradient,
                              *args,
                              **kwargs)
        mx0 = opt_result["x"]  # r -> opt_result
    except (KeyboardInterrupt, StopIteration) as e:
        mx0, opt_result = cost_func.previous_x, None
        if isinstance(e, StopIteration):
            pm._log.info(e)
    finally:
        last_v = cost_func.n_eval
        if progressbar:
            assert isinstance(cost_func.progress, ProgressBar)
            cost_func.progress.total = last_v
            cost_func.progress.update(last_v)
            print()

    mx0 = RaveledVars(mx0, x0.point_map_info)

    vars = get_default_varnames(model.unobserved_value_vars,
                                include_transformed)
    mx = {
        var.name: value
        for var, value in zip(
            vars,
            model.fastfn(vars)(DictToArrayBijection.rmap(mx0)))
    }

    if return_raw:
        return mx, opt_result
    else:
        return mx