Esempio n. 1
0
    def step(self, point):

        for name, shared_var in self.shared.items():
            shared_var.set_value(point[name])

        q = DictToArrayBijection.map(
            {v.name: point[v.name]
             for v in self.vars})

        step_res = self.astep(q)

        if self.generates_stats:
            apoint, stats = step_res
        else:
            apoint = step_res

        if not isinstance(apoint, RaveledVars):
            # We assume that the mapping has stayed the same
            apoint = RaveledVars(apoint, q.point_map_info)

        new_point = DictToArrayBijection.rmap(apoint, start_point=point)

        if self.generates_stats:
            return new_point, stats

        return new_point
Esempio n. 2
0
    def create_shared_params(self,
                             trace=None,
                             size=None,
                             jitter=1,
                             start=None):
        if trace is None:
            if size is None:
                raise opvi.ParametrizationError(
                    "Need `trace` or `size` to initialize")
            else:
                ipfn = make_initial_point_fn(
                    model=self.model,
                    overrides=start,
                    jitter_rvs={},
                    return_transformed=True,
                )
                start = ipfn(
                    self.model.rng_seeder.randint(2**30, dtype=np.int64))
                start = pm.floatX(DictToArrayBijection.map(start))
                # Initialize particles
                histogram = np.tile(start, (size, 1))
                histogram += pm.floatX(
                    np.random.normal(0, jitter, histogram.shape))

        else:
            histogram = np.empty((len(trace) * len(trace.chains), self.ddim))
            i = 0
            for t in trace.chains:
                for j in range(len(trace)):
                    histogram[i] = DictToArrayBijection.map(trace.point(j, t))
                    i += 1
        return dict(histogram=aesara.shared(pm.floatX(histogram), "histogram"))
Esempio n. 3
0
    def step(self, point: PointType):

        partial_funcs_and_point = [
            DictToArrayBijection.mapf(x, start_point=point) for x in self.fs
        ]
        if self.allvars:
            partial_funcs_and_point.append(point)

        apoint = DictToArrayBijection.map(
            {v.name: point[v.name]
             for v in self.vars})
        step_res = self.astep(apoint, *partial_funcs_and_point)

        if self.generates_stats:
            apoint_new, stats = step_res
        else:
            apoint_new = step_res

        if not isinstance(apoint_new, RaveledVars):
            # We assume that the mapping has stayed the same
            apoint_new = RaveledVars(apoint_new, apoint.point_map_info)

        point_new = DictToArrayBijection.rmap(apoint_new, start_point=point)

        if self.generates_stats:
            return point_new, stats

        return point_new
Esempio n. 4
0
    def create_shared_params(self,
                             trace=None,
                             size=None,
                             jitter=1,
                             start=None):
        if trace is None:
            if size is None:
                raise opvi.ParametrizationError(
                    "Need `trace` or `size` to initialize")
            else:
                start = self._prepare_start(start)
                # Initialize particles
                histogram = np.tile(start, (size, 1))
                histogram += pm.floatX(
                    np.random.normal(0, jitter, histogram.shape))

        else:
            histogram = np.empty((len(trace) * len(trace.chains), self.ddim))
            i = 0
            for t in trace.chains:
                for j in range(len(trace)):
                    histogram[i] = DictToArrayBijection.map(trace.point(
                        j, t)).data
                    i += 1
        return dict(histogram=aesara.shared(pm.floatX(histogram), "histogram"))
Esempio n. 5
0
def test_leapfrog_reversible():
    n = 3
    np.random.seed(42)
    start, model, _ = models.non_normal(n)
    size = sum(start[n.name].size for n in model.value_vars)
    scaling = floatX(np.random.rand(size))

    class HMC(BaseHMC):
        def _hamiltonian_step(self, *args, **kwargs):
            pass

    step = HMC(vars=model.value_vars, model=model, scaling=scaling)

    step.integrator._logp_dlogp_func.set_extra_values({})
    astart = DictToArrayBijection.map(start)
    p = RaveledVars(floatX(step.potential.random()), astart.point_map_info)
    q = RaveledVars(floatX(np.random.randn(size)), astart.point_map_info)
    start = step.integrator.compute_state(p, q)
    for epsilon in [0.01, 0.1]:
        for n_steps in [1, 2, 3, 4, 20]:
            state = start
            for _ in range(n_steps):
                state = step.integrator.step(epsilon, state)
            for _ in range(n_steps):
                state = step.integrator.step(-epsilon, state)
            npt.assert_allclose(state.q.data, start.q.data, rtol=1e-5)
            npt.assert_allclose(state.p.data, start.p.data, rtol=1e-5)
Esempio n. 6
0
    def astep(self,
              q0: RaveledVars) -> Tuple[RaveledVars, List[Dict[str, Any]]]:

        point_map_info = q0.point_map_info
        q0 = q0.data

        if not self.steps_until_tune and self.tune:
            if self.tune == "scaling":
                self.scaling = tune(self.scaling,
                                    self.accepted / float(self.tune_interval))
            elif self.tune == "lambda":
                self.lamb = tune(self.lamb,
                                 self.accepted / float(self.tune_interval))
            # Reset counter
            self.steps_until_tune = self.tune_interval
            self.accepted = 0

        epsilon = self.proposal_dist() * self.scaling

        # differential evolution proposal
        # select two other chains
        ir1, ir2 = np.random.choice(self.other_chains, 2, replace=False)
        r1 = DictToArrayBijection.map(self.population[ir1])
        r2 = DictToArrayBijection.map(self.population[ir2])
        # propose a jump
        q = floatX(q0 + self.lamb * (r1.data - r2.data) + epsilon)

        accept = self.delta_logp(q, q0)
        q_new, accepted = metrop_select(accept, q, q0)
        self.accepted += accepted

        self.steps_until_tune -= 1

        stats = {
            "tune": self.tune,
            "scaling": self.scaling,
            "lambda": self.lamb,
            "accept": np.exp(accept),
            "accepted": accepted,
        }

        q_new = RaveledVars(q_new, point_map_info)

        return q_new, [stats]
Esempio n. 7
0
    def _initialize_kernel(self):
        """Create variables and logp function necessary to run kernel

        This method should not be overwritten. If needed, use `setup_kernel`
        instead.

        """
        # Create dictionary that stores original variables shape and size
        initial_point = self.model.recompute_initial_point(
            seed=self.rng.integers(2**30))
        for v in self.variables:
            self.var_info[v.name] = (initial_point[v.name].shape,
                                     initial_point[v.name].size)

        # Create particles bijection map
        if self.start:
            init_rnd = self.start
        else:
            init_rnd = self.initialize_population()

        population = []
        for i in range(self.draws):
            point = Point(
                {v.name: init_rnd[v.name][i]
                 for v in self.variables},
                model=self.model)
            population.append(DictToArrayBijection.map(point).data)
        self.tempered_posterior = np.array(floatX(population))

        # Initialize prior and likelihood log probabilities
        shared = make_shared_replacements(initial_point, self.variables,
                                          self.model)

        self.prior_logp_func = _logp_forw(initial_point, [self.model.varlogpt],
                                          self.variables, shared)
        self.likelihood_logp_func = _logp_forw(initial_point,
                                               [self.model.datalogpt],
                                               self.variables, shared)

        priors = [
            self.prior_logp_func(sample) for sample in self.tempered_posterior
        ]
        likelihoods = [
            self.likelihood_logp_func(sample)
            for sample in self.tempered_posterior
        ]

        self.prior_logp = np.array(priors).squeeze()
        self.likelihood_logp = np.array(likelihoods).squeeze()
Esempio n. 8
0
    def test_missing_data(self):
        # Originally from a case described in #3122
        X = np.random.binomial(1, 0.5, 10)
        X[0] = -1  # masked a single value
        X = np.ma.masked_values(X, value=-1)
        with pm.Model() as m:
            x1 = pm.Uniform("x1", 0.0, 1.0)
            x2 = pm.Bernoulli("x2", x1, observed=X)

        gf = m.logp_dlogp_function()
        gf._extra_are_set = True

        assert m["x2_missing"].type == gf._extra_vars_shared["x2_missing"].type

        pnt = m.test_point.copy()
        del pnt["x2_missing"]

        res = [gf(DictToArrayBijection.map(Point(pnt, model=m))) for i in range(5)]

        assert reduce(lambda x, y: np.array_equal(x, y) and y, res) is not False
Esempio n. 9
0
 def create_shared_params(self, start=None):
     ipfn = make_initial_point_fn(
         model=self.model,
         overrides=start,
         jitter_rvs={},
         return_transformed=True,
     )
     start = ipfn(self.model.rng_seeder.randint(2**30, dtype=np.int64))
     if self.batched:
         start = start[self.group[0].name][0]
     else:
         start = DictToArrayBijection.map(start)
     rho = np.zeros((self.ddim, ))
     if self.batched:
         start = np.tile(start, (self.bdim, 1))
         rho = np.tile(rho, (self.bdim, 1))
     return {
         "mu": aesara.shared(pm.floatX(start), "mu"),
         "rho": aesara.shared(pm.floatX(rho), "rho"),
     }
Esempio n. 10
0
def fixed_hessian(point, vars=None, model=None):
    """
    Returns a fixed Hessian for any chain location.

    Parameters
    ----------
    model: Model (optional if in `with` context)
    point: dict
    vars: list
        Variables for which Hessian is to be calculated.
    """

    model = modelcontext(model)
    if vars is None:
        vars = model.cont_vars
    vars = inputvars(vars)

    point = Point(point, model=model)

    rval = np.ones(DictToArrayBijection.map(point).size) / 10
    return rval
Esempio n. 11
0
 def create_shared_params(self, start=None):
     ipfn = make_initial_point_fn(
         model=self.model,
         overrides=start,
         jitter_rvs={},
         return_transformed=True,
     )
     start = ipfn(self.model.rng_seeder.randint(2**30, dtype=np.int64))
     if self.batched:
         start = start[self.group[0].name][0]
     else:
         start = DictToArrayBijection.map(start)
     n = self.ddim
     L_tril = np.eye(n)[np.tril_indices(n)].astype(aesara.config.floatX)
     if self.batched:
         start = np.tile(start, (self.bdim, 1))
         L_tril = np.tile(L_tril, (self.bdim, 1))
     return {
         "mu": aesara.shared(start, "mu"),
         "L_tril": aesara.shared(L_tril, "L_tril")
     }
Esempio n. 12
0
    def astep(self, q0):
        """Perform a single HMC iteration."""
        perf_start = time.perf_counter()
        process_start = time.process_time()

        p0 = self.potential.random()
        p0 = RaveledVars(p0, q0.point_map_info)

        start = self.integrator.compute_state(q0, p0)

        if not np.isfinite(start.energy):
            model = self._model
            check_test_point = model.point_logps()
            error_logp = check_test_point.loc[
                (np.abs(check_test_point) >= 1e20) | np.isnan(check_test_point)
            ]
            self.potential.raise_ok(q0.point_map_info)
            message_energy = (
                "Bad initial energy, check any log probabilities that "
                "are inf or -inf, nan or very small:\n{}".format(error_logp.to_string())
            )
            warning = SamplerWarning(
                WarningType.BAD_ENERGY,
                message_energy,
                "critical",
                self.iter_count,
            )
            self._warnings.append(warning)
            raise SamplingError("Bad initial energy")

        adapt_step = self.tune and self.adapt_step_size
        step_size = self.step_adapt.current(adapt_step)
        self.step_size = step_size

        if self._step_rand is not None:
            step_size = self._step_rand(step_size)

        hmc_step = self._hamiltonian_step(start, p0.data, step_size)

        perf_end = time.perf_counter()
        process_end = time.process_time()

        self.step_adapt.update(hmc_step.accept_stat, adapt_step)
        self.potential.update(hmc_step.end.q, hmc_step.end.q_grad, self.tune)
        if hmc_step.divergence_info:
            info = hmc_step.divergence_info
            point = None
            point_dest = None
            info_store = None
            if self.tune:
                kind = WarningType.TUNING_DIVERGENCE
            else:
                kind = WarningType.DIVERGENCE
                self._num_divs_sample += 1
                # We don't want to fill up all memory with divergence info
                if self._num_divs_sample < 100 and info.state is not None:
                    point = DictToArrayBijection.rmap(info.state.q)

                if self._num_divs_sample < 100 and info.state_div is not None:
                    point = DictToArrayBijection.rmap(info.state_div.q)

                if self._num_divs_sample < 100:
                    info_store = info
            warning = SamplerWarning(
                kind,
                info.message,
                "debug",
                self.iter_count,
                info.exec_info,
                divergence_point_source=point,
                divergence_point_dest=point_dest,
                divergence_info=info_store,
            )

            self._warnings.append(warning)

        self.iter_count += 1
        if not self.tune:
            self._samples_after_tune += 1

        stats = {
            "tune": self.tune,
            "diverging": bool(hmc_step.divergence_info),
            "perf_counter_diff": perf_end - perf_start,
            "process_time_diff": process_end - process_start,
            "perf_counter_start": perf_start,
        }

        stats.update(hmc_step.stats)
        stats.update(self.step_adapt.stats())

        return hmc_step.end.q, [stats]
Esempio n. 13
0
    def __call__(self, q0: RaveledVars) -> RaveledVars:
        """Returns proposed sample given the current sample
        in dictionary form (q0_dict)."""

        # Logging is reduced to avoid extensive console output
        # during multiple recursive calls of subsample()
        _log = logging.getLogger("pymc")
        _log.setLevel(logging.ERROR)

        # Convert current sample from RaveledVars ->
        # dict before feeding to subsample.
        q0_dict = DictToArrayBijection.rmap(q0)

        with self.model_below:
            # Check if the tuning flag has been set to False
            # in which case tuning is stopped. The flag is set
            # to False (by MLDA's astep) when the burn-in
            # iterations of the highest-level MLDA sampler run out.
            # The change propagates to all levels.

            if self.tune:
                # Subsample in tuning mode
                trace = subsample(
                    draws=0,
                    step=self.step_method_below,
                    start=q0_dict,
                    tune=self.subsampling_rate,
                )
            else:
                # Subsample in normal mode without tuning
                # If DEMetropolisZMLDA is the base sampler a flag is raised to
                # make sure that history is edited after tuning ends
                if self.tuning_end_trigger:
                    if isinstance(self.step_method_below, DEMetropolisZMLDA):
                        self.step_method_below.tuning_end_trigger = True
                    self.tuning_end_trigger = False

                trace = subsample(
                    draws=self.subsampling_rate,
                    step=self.step_method_below,
                    start=q0_dict,
                    tune=0,
                )

        # set logging back to normal
        _log.setLevel(logging.NOTSET)

        # return sample with index self.subchain_selection from the generated
        # sequence of length self.subsampling_rate. The index is set within
        # MLDA's astep() function
        q_dict = trace.point(self.subchain_selection)

        # Make sure output dict is ordered the same way as the input dict.
        q_dict = Point(
            {key: q_dict[key]
             for key in q0_dict.keys()},
            model=self.model_below,
            filter_model_vars=True,
        )

        return DictToArrayBijection.map(q_dict)
Esempio n. 14
0
def find_MAP(start=None,
             vars=None,
             method="L-BFGS-B",
             return_raw=False,
             include_transformed=True,
             progressbar=True,
             maxeval=5000,
             model=None,
             *args,
             seed: Optional[int] = None,
             **kwargs):
    """Finds the local maximum a posteriori point given a model.

    `find_MAP` should not be used to initialize the NUTS sampler. Simply call
    ``pymc.sample()`` and it will automatically initialize NUTS in a better
    way.

    Parameters
    ----------
    start: `dict` of parameter values (Defaults to `model.initial_point`)
    vars: list
        List of variables to optimize and set to optimum (Defaults to all continuous).
    method: string or callable
        Optimization algorithm (Defaults to 'L-BFGS-B' unless
        discrete variables are specified in `vars`, then
        `Powell` which will perform better).  For instructions on use of a callable,
        refer to SciPy's documentation of `optimize.minimize`.
    return_raw: bool
        Whether to return the full output of scipy.optimize.minimize (Defaults to `False`)
    include_transformed: bool, optional defaults to True
        Flag for reporting automatically transformed variables in addition
        to original variables.
    progressbar: bool, optional defaults to True
        Whether or not to display a progress bar in the command line.
    maxeval: int, optional, defaults to 5000
        The maximum number of times the posterior distribution is evaluated.
    model: Model (optional if in `with` context)
    *args, **kwargs
        Extra args passed to scipy.optimize.minimize

    Notes
    -----
    Older code examples used `find_MAP` to initialize the NUTS sampler,
    but this is not an effective way of choosing starting values for sampling.
    As a result, we have greatly enhanced the initialization of NUTS and
    wrapped it inside ``pymc.sample()`` and you should thus avoid this method.
    """
    model = modelcontext(model)

    if vars is None:
        vars = model.cont_vars
        if not vars:
            raise ValueError("Model has no unobserved continuous variables.")
    vars = inputvars(vars)
    disc_vars = list(typefilter(vars, discrete_types))
    allinmodel(vars, model)
    ipfn = make_initial_point_fn(
        model=model,
        jitter_rvs={},
        return_transformed=True,
        overrides=start,
    )
    if seed is None:
        seed = model.rng_seeder.randint(2**30, dtype=np.int64)
    start = ipfn(seed)
    model.check_start_vals(start)

    x0 = DictToArrayBijection.map(start)

    # TODO: If the mapping is fixed, we can simply create graphs for the
    # mapping and avoid all this bijection overhead
    def logp_func(x):
        return DictToArrayBijection.mapf(model.fastlogp_nojac)(RaveledVars(
            x, x0.point_map_info))

    try:
        # This might be needed for calls to `dlogp_func`
        # start_map_info = tuple((v.name, v.shape, v.dtype) for v in vars)

        def dlogp_func(x):
            return DictToArrayBijection.mapf(model.fastdlogp_nojac(vars))(
                RaveledVars(x, x0.point_map_info))

        compute_gradient = True
    except (AttributeError, NotImplementedError, tg.NullTypeGradError):
        compute_gradient = False

    if disc_vars or not compute_gradient:
        pm._log.warning(
            "Warning: gradient not available." +
            "(E.g. vars contains discrete variables). MAP " +
            "estimates may not be accurate for the default " +
            "parameters. Defaulting to non-gradient minimization " +
            "'Powell'.")
        method = "Powell"

    if compute_gradient:
        cost_func = CostFuncWrapper(maxeval, progressbar, logp_func,
                                    dlogp_func)
    else:
        cost_func = CostFuncWrapper(maxeval, progressbar, logp_func)

    try:
        opt_result = minimize(cost_func,
                              x0.data,
                              method=method,
                              jac=compute_gradient,
                              *args,
                              **kwargs)
        mx0 = opt_result["x"]  # r -> opt_result
    except (KeyboardInterrupt, StopIteration) as e:
        mx0, opt_result = cost_func.previous_x, None
        if isinstance(e, StopIteration):
            pm._log.info(e)
    finally:
        last_v = cost_func.n_eval
        if progressbar:
            assert isinstance(cost_func.progress, ProgressBar)
            cost_func.progress.total = last_v
            cost_func.progress.update(last_v)
            print(file=sys.stdout)

    mx0 = RaveledVars(mx0, x0.point_map_info)

    vars = get_default_varnames(model.unobserved_value_vars,
                                include_transformed)
    mx = {
        var.name: value
        for var, value in zip(
            vars,
            model.fastfn(vars)(DictToArrayBijection.rmap(mx0)))
    }

    if return_raw:
        return mx, opt_result
    else:
        return mx
Esempio n. 15
0
 def dlogp_func(x):
     return DictToArrayBijection.mapf(model.fastdlogp_nojac(vars))(
         RaveledVars(x, x0.point_map_info))
Esempio n. 16
0
    def __init__(
        self,
        draws: int,
        tune: int,
        step_method,
        step_method_pickled,
        chain: int,
        seed,
        start: Dict[str, np.ndarray],
        mp_ctx,
    ):
        self.chain = chain
        process_name = "worker_chain_%s" % chain
        self._msg_pipe, remote_conn = multiprocessing.Pipe()

        self._shared_point = {}
        self._point = {}

        for name, shape, dtype in DictToArrayBijection.map(
                start).point_map_info:
            size = 1
            for dim in shape:
                size *= int(dim)
            size *= dtype.itemsize
            if size != ctypes.c_size_t(size).value:
                raise ValueError("Variable %s is too large" % name)

            array = mp_ctx.RawArray("c", size)
            self._shared_point[name] = (array, shape, dtype)
            array_np = np.frombuffer(array, dtype).reshape(shape)
            array_np[...] = start[name]
            self._point[name] = array_np

        self._readable = True
        self._num_samples = 0

        if step_method_pickled is not None:
            step_method_send = step_method_pickled
        else:
            if mp_ctx.get_start_method() == "spawn":
                raise ValueError(
                    "please provide a pre-pickled step method when multiprocessing start method is 'spawn'"
                )
            step_method_send = step_method

        self._process = mp_ctx.Process(
            daemon=True,
            name=process_name,
            target=_run_process,
            args=(
                process_name,
                remote_conn,
                step_method_send,
                step_method_pickled is not None,
                self._shared_point,
                draws,
                tune,
                seed,
            ),
        )
        self._process.start()
        # Close the remote pipe, so that we get notified if the other
        # end is closed.
        remote_conn.close()