Ejemplo n.º 1
0
    def step(self, point):

        for name, shared_var in self.shared.items():
            shared_var.set_value(point[name])

        q = DictToArrayBijection.map(
            {v.name: point[v.name]
             for v in self.vars})

        step_res = self.astep(q)

        if self.generates_stats:
            apoint, stats = step_res
        else:
            apoint = step_res

        if not isinstance(apoint, RaveledVars):
            # We assume that the mapping has stayed the same
            apoint = RaveledVars(apoint, q.point_map_info)

        new_point = DictToArrayBijection.rmap(apoint, start_point=point)

        if self.generates_stats:
            return new_point, stats

        return new_point
Ejemplo n.º 2
0
def test_leapfrog_reversible_single():
    n = 3
    start, model, _ = models.non_normal(n)

    integrators = ['leapfrog', 'two-stage', 'three-stage']
    steps = [BaseHMC(vars=model.vars, model=model, integrator=method, use_single_leapfrog=True)
             for method in integrators]
    for method, step in zip(integrators, steps):
        bij = DictToArrayBijection(step.ordering, start)
        q0 = bij.map(start)
        p0 = floatX(np.ones(n) * .05)
        precision = select_by_precision(float64=1E-8, float32=1E-5)
        for epsilon in [0.01, 0.1, 1.2]:
            for n_steps in [1, 2, 3, 4, 20]:
                dlogp0 = step.dlogp(q0)

                q, p = q0, p0
                dlogp = dlogp0

                energy = step.compute_energy(q, p)
                for _ in range(n_steps):
                    q, p, v, dlogp, _ = step.leapfrog(q, p, dlogp, floatX(np.array(epsilon)))
                p = -p
                for _ in range(n_steps):
                    q, p, v, dlogp, _ = step.leapfrog(q, p, dlogp, floatX(np.array(epsilon)))

                close_to(q, q0, precision, str(('q', method, n_steps, epsilon)))
                close_to(-p, p0, precision, str(('p', method, n_steps, epsilon)))
Ejemplo n.º 3
0
    def create_shared_params(self,
                             trace=None,
                             size=None,
                             jitter=1,
                             start=None):
        if trace is None:
            if size is None:
                raise opvi.ParametrizationError(
                    "Need `trace` or `size` to initialize")
            else:
                if start is None:
                    start = self.model.initial_point
                else:
                    start_ = self.model.initial_point.copy()
                    self.model.update_start_vals(start_, start)
                    start = start_
                start = pm.floatX(DictToArrayBijection.map(start))
                # Initialize particles
                histogram = np.tile(start, (size, 1))
                histogram += pm.floatX(
                    np.random.normal(0, jitter, histogram.shape))

        else:
            histogram = np.empty((len(trace) * len(trace.chains), self.ddim))
            i = 0
            for t in trace.chains:
                for j in range(len(trace)):
                    histogram[i] = DictToArrayBijection.map(trace.point(j, t))
                    i += 1
        return dict(histogram=aesara.shared(pm.floatX(histogram), "histogram"))
Ejemplo n.º 4
0
def test_leapfrog_reversible_single():
    n = 3
    start, model, _ = models.non_normal(n)

    integrators = ['leapfrog', 'two-stage', 'three-stage']
    steps = [BaseHMC(vars=model.vars, model=model, integrator=method, use_single_leapfrog=True)
             for method in integrators]
    for method, step in zip(integrators, steps):
        bij = DictToArrayBijection(step.ordering, start)
        q0 = bij.map(start)
        p0 = np.ones(n) * .05
        for epsilon in [0.01, 0.1, 1.2]:
            for n_steps in [1, 2, 3, 4, 20]:
                dlogp0 = step.dlogp(q0)

                q, p = q0, p0
                dlogp = dlogp0

                energy = step.compute_energy(q, p)
                for _ in range(n_steps):
                    q, p, v, dlogp, _ = step.leapfrog(q, p, dlogp, np.array(epsilon))
                p = -p
                for _ in range(n_steps):
                    q, p, v, dlogp, _ = step.leapfrog(q, p, dlogp, np.array(epsilon))

                close_to(q, q0, 1e-8, str(('q', method, n_steps, epsilon)))
                close_to(-p, p0, 1e-8, str(('p', method, n_steps, epsilon)))
Ejemplo n.º 5
0
class PyMC3Potential:
    def __init__(self, vars=None, model=None, point=None):
        self.model = pm.modelcontext(model)

        # Work out the full starting coordinates
        if point is None:
            point = self.model.test_point
        else:
            pm.util.update_start_vals(point, self.model.test_point, self.model)

        # Fit all the parameters by default
        if vars is None:
            vars = self.model.cont_vars
        self.vars = inputvars(vars)
        allinmodel(self.vars, self.model)

        # Work out the relevant bijection map
        point = Point(point, model=self.model)
        self.bijection = DictToArrayBijection(ArrayOrdering(self.vars), point)

        # Pre-compile the theano model and gradient
        nlp = -self.model.logpt
        grad = theano.grad(nlp, self.vars, disconnected_inputs="ignore")
        self.func = get_theano_function_for_var([nlp] + grad, model=self.model)

    def __call__(self, coords):
        res = self.func(*get_args_for_theano_function(
            self.bijection.rmap(coords), model=self.model))
        d = dict(zip((v.name for v in self.vars), res[1:]))
        g = self.bijection.map(d)
        return res[0], g
Ejemplo n.º 6
0
    def step(self, point: PointType):

        partial_funcs_and_point = [
            DictToArrayBijection.mapf(x, start_point=point) for x in self.fs
        ]
        if self.allvars:
            partial_funcs_and_point.append(point)

        apoint = DictToArrayBijection.map(
            {v.name: point[v.name]
             for v in self.vars})
        step_res = self.astep(apoint, *partial_funcs_and_point)

        if self.generates_stats:
            apoint_new, stats = step_res
        else:
            apoint_new = step_res

        if not isinstance(apoint_new, RaveledVars):
            # We assume that the mapping has stayed the same
            apoint_new = RaveledVars(apoint_new, apoint.point_map_info)

        point_new = DictToArrayBijection.rmap(apoint_new, start_point=point)

        if self.generates_stats:
            return point_new, stats

        return point_new
Ejemplo n.º 7
0
 def __init__(self, vars, out_vars, shared, blocked=True):
     self.vars = vars
     self.ordering = ArrayOrdering(vars)
     self.lordering = utility.ListArrayOrdering(out_vars, intype='tensor')
     lpoint = [var.tag.test_value for var in out_vars]
     self.shared = {var.name: shared for var, shared in shared.items()}
     self.blocked = blocked
     self.bij = DictToArrayBijection(self.ordering, self.population[0])
     self.lij = utility.ListToArrayBijection(self.lordering, lpoint)
Ejemplo n.º 8
0
    def step(self, point):
        for var, share in self.shared.items():
            share.set_value(point[var])

        self.bij = DictToArrayBijection(self.ordering, point)

        if self.generates_stats:
            apoint, stats = self.astep(self.bij.map(point))
            return self.bij.rmap(apoint), stats
        else:
            apoint = self.astep(self.bij.map(point))
            return self.bij.rmap(apoint)
Ejemplo n.º 9
0
def test_leapfrog_reversible():
    n = 3
    np.random.seed(42)
    start, model, _ = models.non_normal(n)
    size = sum(start[n.name].size for n in model.value_vars)
    scaling = floatX(np.random.rand(size))

    class HMC(BaseHMC):
        def _hamiltonian_step(self, *args, **kwargs):
            pass

    step = HMC(vars=model.value_vars, model=model, scaling=scaling)

    step.integrator._logp_dlogp_func.set_extra_values({})
    astart = DictToArrayBijection.map(start)
    p = RaveledVars(floatX(step.potential.random()), astart.point_map_info)
    q = RaveledVars(floatX(np.random.randn(size)), astart.point_map_info)
    start = step.integrator.compute_state(p, q)
    for epsilon in [0.01, 0.1]:
        for n_steps in [1, 2, 3, 4, 20]:
            state = start
            for _ in range(n_steps):
                state = step.integrator.step(epsilon, state)
            for _ in range(n_steps):
                state = step.integrator.step(-epsilon, state)
            npt.assert_allclose(state.q.data, start.q.data, rtol=1e-5)
            npt.assert_allclose(state.p.data, start.p.data, rtol=1e-5)
Ejemplo n.º 10
0
    def initialize_population(self):
        """Create an initial population from the prior distribution."""
        population = []
        var_info = OrderedDict()
        if self.start is None:
            init_rnd = sample_prior_predictive(
                self.draws,
                var_names=[v.name for v in self.model.unobserved_RVs],
                model=self.model,
            )
        else:
            init_rnd = self.start

        init = self.model.initial_point

        for v in self.variables:
            var_info[v.name] = (init[v.name].shape, init[v.name].size)

        for i in range(self.draws):

            point = Point(
                {v.name: init_rnd[v.name][i]
                 for v in self.variables},
                model=self.model)
            population.append(DictToArrayBijection.map(point).data)

        self.posterior = np.array(floatX(population))
        self.var_info = var_info
Ejemplo n.º 11
0
def test_leapfrog_reversible():
    n = 3
    start, model, _ = models.non_normal(n)
    step = BaseHMC(vars=model.vars, model=model)
    bij = DictToArrayBijection(step.ordering, start)
    q0 = bij.map(start)
    p0 = floatX(np.ones(n) * .05)
    precision = select_by_precision(float64=1E-8, float32=1E-4)
    for epsilon in [.01, .1, 1.2]:
        for n_steps in [1, 2, 3, 4, 20]:

            q, p = q0, p0
            q, p, _ = step.leapfrog(q, p, floatX(np.array(epsilon)), np.array(n_steps, dtype='int32'))
            q, p, _ = step.leapfrog(q, -p, floatX(np.array(epsilon)), np.array(n_steps, dtype='int32'))
            close_to(q, q0, precision, str((n_steps, epsilon)))
            close_to(-p, p0, precision, str((n_steps, epsilon)))
Ejemplo n.º 12
0
class ArrayStepSharedLLK(BlockedStep):
    """
    Modified ArrayStepShared To handle returned larger point including the
    likelihood values.
    Takes additionally a list of output vars including the likelihoods.

    Parameters
    ----------

    vars : list
        variables to be sampled
    out_vars : list
        variables to be stored in the traces
    shared : dict
        theano variable -> shared variables
    blocked : boolen
        (default True)
    """
    def __init__(self, vars, out_vars, shared, blocked=True):
        self.vars = vars
        self.ordering = ArrayOrdering(vars)
        self.lordering = ListArrayOrdering(out_vars, intype='tensor')
        lpoint = [var.tag.test_value for var in out_vars]
        self.shared = {var.name: shared for var, shared in shared.items()}
        self.blocked = blocked
        self.bij = DictToArrayBijection(self.ordering, self.population[0])

        blacklist = list(
            set(self.lordering.variables) - set([var.name for var in vars]))

        self.lij = ListToArrayBijection(self.lordering,
                                        lpoint,
                                        blacklist=blacklist)

    def __getstate__(self):
        return self.__dict__

    def __setstate__(self, state):
        self.__dict__.update(state)

    def step(self, point):
        for var, share in self.shared.items():
            share.container.storage[0] = point[var]

        apoint, alist = self.astep(self.bij.map(point))

        return self.bij.rmap(apoint), alist
Ejemplo n.º 13
0
def test_leapfrog_reversible():
    n = 3
    start, model, _ = models.non_normal(n)
    step = BaseHMC(vars=model.vars, model=model)
    bij = DictToArrayBijection(step.ordering, start)
    q0 = bij.map(start)
    p0 = np.ones(n) * .05

    for epsilon in [.01, .1, 1.2]:
        for n_steps in [1, 2, 3, 4, 20]:

            q, p = q0, p0
            q, p, _ = step.leapfrog(q, p, np.array(epsilon), np.array(n_steps, dtype='int32'))
            q, p, _ = step.leapfrog(q, -p, np.array(epsilon), np.array(n_steps, dtype='int32'))

            close_to(q, q0, 1e-8, str((n_steps, epsilon)))
            close_to(-p, p0, 1e-8, str((n_steps, epsilon)))
Ejemplo n.º 14
0
    def __init__(self, model=None):

        # Get the model
        self.model = pm.modelcontext(model)

        # Get the variables
        self.varnames = get_default_varnames(self.model.unobserved_RVs, False)

        # Get the starting point
        self.start = Point(self.model.test_point, model=self.model)
        self.ndim = len(self.start)
        self.mean = None
        self.cov = None

        # Compile the log probability function
        self.vars = inputvars(self.model.cont_vars)
        self.bij = DictToArrayBijection(ArrayOrdering(self.vars), self.start)
        self.func = get_theano_function_for_var(
            self.model.logpt, model=self.model
        )
Ejemplo n.º 15
0
class ModelWrapper:
    def __init__(self, start=None, vars=None, model=None):
        model = self.model = pm.modelcontext(model)

        # Work out the full starting coordinates
        if start is None:
            start = model.test_point
        else:
            update_start_vals(start, model.test_point, model)
        self.start = start

        # Fit all the parameters by default
        if vars is None:
            vars = model.cont_vars
        vars = self.vars = inputvars(vars)
        allinmodel(vars, model)

        # Work out the relevant bijection map
        start = Point(start, model=model)
        self.bij = DictToArrayBijection(ArrayOrdering(vars), start)

        # Pre-compile the theano model and gradient
        nlp = -model.logpt
        grad = theano.grad(nlp, vars, disconnected_inputs="ignore")
        self.func = get_theano_function_for_var([nlp] + grad, model=model)

    def __call__(self, vec):
        try:
            res = self.func(*get_args_for_theano_function(self.bij.rmap(vec),
                                                          model=self.model))
        except Exception:
            import traceback

            print("array:", vec)
            print("point:", self.bij.rmap(vec))
            traceback.print_exc()
            raise

        d = dict(zip((v.name for v in self.vars), res[1:]))
        g = self.bij.map(d)
        return res[0], g
Ejemplo n.º 16
0
    def astep(self, q0: RaveledVars) -> Tuple[RaveledVars, List[Dict[str, Any]]]:

        point_map_info = q0.point_map_info
        q0 = q0.data

        if not self.steps_until_tune and self.tune:
            if self.tune == "scaling":
                self.scaling = tune(self.scaling, self.accepted / float(self.tune_interval))
            elif self.tune == "lambda":
                self.lamb = tune(self.lamb, self.accepted / float(self.tune_interval))
            # Reset counter
            self.steps_until_tune = self.tune_interval
            self.accepted = 0

        epsilon = self.proposal_dist() * self.scaling

        # differential evolution proposal
        # select two other chains
        ir1, ir2 = np.random.choice(self.other_chains, 2, replace=False)
        r1 = DictToArrayBijection.map(self.population[ir1])
        r2 = DictToArrayBijection.map(self.population[ir2])
        # propose a jump
        q = floatX(q0 + self.lamb * (r1.data - r2.data) + epsilon)

        accept = self.delta_logp(q, q0)
        q_new, accepted = metrop_select(accept, q, q0)
        self.accepted += accepted

        self.steps_until_tune -= 1

        stats = {
            "tune": self.tune,
            "scaling": self.scaling,
            "lambda": self.lamb,
            "accept": np.exp(accept),
            "accepted": accepted,
        }

        q_new = RaveledVars(q_new, point_map_info)

        return q_new, [stats]
Ejemplo n.º 17
0
def fixed_hessian(point, vars=None, model=None):
    """
    Returns a fixed Hessian for any chain location.

    Parameters
    ----------
    model: Model (optional if in `with` context)
    point: dict
    vars: list
        Variables for which Hessian is to be calculated.
    """

    model = modelcontext(model)
    if vars is None:
        vars = model.cont_vars
    vars = inputvars(vars)

    point = Point(point, model=model)

    bij = DictToArrayBijection(ArrayOrdering(vars), point)
    rval = np.ones(bij.map(point).size) / 10
    return rval
Ejemplo n.º 18
0
    def __init__(self, vars=None, model=None, point=None):
        self.model = pm.modelcontext(model)

        # Work out the full starting coordinates
        if point is None:
            point = self.model.test_point
        else:
            pm.util.update_start_vals(point, self.model.test_point, self.model)

        # Fit all the parameters by default
        if vars is None:
            vars = self.model.cont_vars
        self.vars = inputvars(vars)
        allinmodel(self.vars, self.model)

        # Work out the relevant bijection map
        point = Point(point, model=self.model)
        self.bijection = DictToArrayBijection(ArrayOrdering(self.vars), point)

        # Pre-compile the theano model and gradient
        nlp = -self.model.logpt
        grad = theano.grad(nlp, self.vars, disconnected_inputs="ignore")
        self.func = get_theano_function_for_var([nlp] + grad, model=self.model)
Ejemplo n.º 19
0
    def step(self, point):
        bij = DictToArrayBijection(self.ordering, point)

        inputs = [bij.mapf(x) for x in self.fs]
        if self.allvars:
            inputs.append(point)

        if self.generates_stats:
            apoint, stats = self.astep(bij.map(point), *inputs)
            return bij.rmap(apoint), stats
        else:
            apoint = self.astep(bij.map(point), *inputs)
            return bij.rmap(apoint)
Ejemplo n.º 20
0
class ArrayStepShared(BlockedStep):
    """Faster version of ArrayStep that requires the substep method that does not wrap
       the functions the step method uses.

    Works by setting shared variables before using the step. This eliminates the mapping
    and unmapping overhead as well as moving fewer variables around.
    """
    def __init__(self, vars, shared, blocked=True):
        """
        Parameters
        ----------
        vars: list of sampling variables
        shared: dict of aesara variable -> shared variable
        blocked: Boolean (default True)
        """
        self.vars = vars
        self.ordering = ArrayOrdering(vars)
        self.shared = {
            get_var_name(var): shared
            for var, shared in shared.items()
        }
        self.blocked = blocked
        self.bij = None

    def step(self, point):
        for var, share in self.shared.items():
            share.set_value(point[var])

        self.bij = DictToArrayBijection(self.ordering, point)

        if self.generates_stats:
            apoint, stats = self.astep(self.bij.map(point))
            return self.bij.rmap(apoint), stats
        else:
            apoint = self.astep(self.bij.map(point))
            return self.bij.rmap(apoint)
Ejemplo n.º 21
0
 def create_shared_params(self, start=None):
     if start is None:
         start = self.model.initial_point
     else:
         start_ = start.copy()
         self.model.update_start_vals(start_, self.model.initial_point)
         start = start_
     if self.batched:
         start = start[self.group[0].name][0]
     else:
         start = DictToArrayBijection.map(start)
     rho = np.zeros((self.ddim, ))
     if self.batched:
         start = np.tile(start, (self.bdim, 1))
         rho = np.tile(rho, (self.bdim, 1))
     return {
         "mu": aesara.shared(pm.floatX(start), "mu"),
         "rho": aesara.shared(pm.floatX(rho), "rho"),
     }
Ejemplo n.º 22
0
 def create_shared_params(self, start=None):
     if start is None:
         start = self.model.initial_point
     else:
         start_ = start.copy()
         self.model.update_start_vals(start_, self.model.initial_point)
         start = start_
     if self.batched:
         start = start[self.group[0].name][0]
     else:
         start = DictToArrayBijection.map(start)
     n = self.ddim
     L_tril = np.eye(n)[np.tril_indices(n)].astype(aesara.config.floatX)
     if self.batched:
         start = np.tile(start, (self.bdim, 1))
         L_tril = np.tile(L_tril, (self.bdim, 1))
     return {
         "mu": aesara.shared(start, "mu"),
         "L_tril": aesara.shared(L_tril, "L_tril")
     }
Ejemplo n.º 23
0
    def test_missing_data(self):
        # Originally from a case described in #3122
        X = np.random.binomial(1, 0.5, 10)
        X[0] = -1  # masked a single value
        X = np.ma.masked_values(X, value=-1)
        with pm.Model() as m:
            x1 = pm.Uniform("x1", 0.0, 1.0)
            x2 = pm.Bernoulli("x2", x1, observed=X)

        gf = m.logp_dlogp_function()
        gf._extra_are_set = True

        assert m["x2_missing"].type == gf._extra_vars_shared["x2_missing"].type

        pnt = m.test_point.copy()
        del pnt["x2_missing"]

        res = [gf(DictToArrayBijection.map(Point(pnt, model=m))) for i in range(5)]

        assert reduce(lambda x, y: np.array_equal(x, y) and y, res) is not False
Ejemplo n.º 24
0
    def astep(self, q0):
        """Perform a single HMC iteration."""
        perf_start = time.perf_counter()
        process_start = time.process_time()

        p0 = self.potential.random()
        p0 = RaveledVars(p0, q0.point_map_info)

        start = self.integrator.compute_state(q0, p0)

        if not np.isfinite(start.energy):
            model = self._model
            check_test_point = model.point_logps()
            error_logp = check_test_point.loc[(
                np.abs(check_test_point) >= 1e20) | np.isnan(check_test_point)]
            self.potential.raise_ok(q0.point_map_info)
            message_energy = (
                "Bad initial energy, check any log probabilities that "
                "are inf or -inf, nan or very small:\n{}".format(
                    error_logp.to_string()))
            warning = SamplerWarning(
                WarningType.BAD_ENERGY,
                message_energy,
                "critical",
                self.iter_count,
            )
            self._warnings.append(warning)
            raise SamplingError("Bad initial energy")

        adapt_step = self.tune and self.adapt_step_size
        step_size = self.step_adapt.current(adapt_step)
        self.step_size = step_size

        if self._step_rand is not None:
            step_size = self._step_rand(step_size)

        hmc_step = self._hamiltonian_step(start, p0.data, step_size)

        perf_end = time.perf_counter()
        process_end = time.process_time()

        self.step_adapt.update(hmc_step.accept_stat, adapt_step)
        self.potential.update(hmc_step.end.q, hmc_step.end.q_grad, self.tune)
        if hmc_step.divergence_info:
            info = hmc_step.divergence_info
            point = None
            point_dest = None
            info_store = None
            if self.tune:
                kind = WarningType.TUNING_DIVERGENCE
            else:
                kind = WarningType.DIVERGENCE
                self._num_divs_sample += 1
                # We don't want to fill up all memory with divergence info
                if self._num_divs_sample < 100 and info.state is not None:
                    point = DictToArrayBijection.rmap(info.state.q)

                if self._num_divs_sample < 100 and info.state_div is not None:
                    point = DictToArrayBijection.rmap(info.state_div.q)

                if self._num_divs_sample < 100:
                    info_store = info
            warning = SamplerWarning(
                kind,
                info.message,
                "debug",
                self.iter_count,
                info.exec_info,
                divergence_point_source=point,
                divergence_point_dest=point_dest,
                divergence_info=info_store,
            )

            self._warnings.append(warning)

        self.iter_count += 1
        if not self.tune:
            self._samples_after_tune += 1

        stats = {
            "tune": self.tune,
            "diverging": bool(hmc_step.divergence_info),
            "perf_counter_diff": perf_end - perf_start,
            "process_time_diff": process_end - process_start,
            "perf_counter_start": perf_start,
        }

        stats.update(hmc_step.stats)
        stats.update(self.step_adapt.stats())

        return hmc_step.end.q, [stats]
Ejemplo n.º 25
0
class MCMCInterface:
    """
    An interface for using a ``pymc3`` model with a plain vanilla MCMC sampler.



    Args:
        model (optional): The ``pymc3`` model. If ``None`` (default), uses the
            current model on the stack.

    """

    def __init__(self, model=None):

        # Get the model
        self.model = pm.modelcontext(model)

        # Get the variables
        self.varnames = get_default_varnames(self.model.unobserved_RVs, False)

        # Get the starting point
        self.start = Point(self.model.test_point, model=self.model)
        self.ndim = len(self.start)
        self.mean = None
        self.cov = None

        # Compile the log probability function
        self.vars = inputvars(self.model.cont_vars)
        self.bij = DictToArrayBijection(ArrayOrdering(self.vars), self.start)
        self.func = get_theano_function_for_var(
            self.model.logpt, model=self.model
        )

    def optimize(self, **kwargs):
        """
        Maximize the log probability of a ``pymc3`` model.

        This routine wraps ``pymc3_ext.optimize``, which in turn
        wraps the ``scipy.optimize.minimize`` function. This method
        accepts any of the keywords accepted by either of those
        two functions.

        Returns:
            The array of parameter values at the optimum point.

        """
        self.map_soln, self.info = optimize(
            model=self.model, return_info=True, **kwargs
        )
        self.mean = self.info["x"]
        self.cov = self.info["hess_inv"]
        return self.mean

    def get_initial_state(
        self, nwalkers=30, var=None, check_finite=True, max_tries=100
    ):
        """
        Generate random initial points for sampling.

        If the ``optimize`` method was called beforehand, this method
        returns samples from a multidimensional Gaussian centered on
        the maximum a posteriori (MAP) solution with covariance equal
        to the inverse of the Hessian matrix at that point, unless
        ``var`` is provided, in which case that is used instead.
        If the optimizer was not called, this method
        returns samples from a Gaussian with mean equal to the
        model's test point (``model.test_point``) and variance equal to
        ``var``.

        Args:
            var (float, array, or matrix, optional): Variance of the
                multidimensional Gaussian used to draw samples.
                This quantity is optional if ``optimize`` was called
                beforehand, otherwise it must be provided.
                Default is ``None``.

        Returns:
            An array of shape ``(nwalkers, ndim)`` where ``ndim``
            is the number of free model parameters.

        """
        if var is None:
            if self.mean is not None and self.cov is not None:
                # User ran `optimize`, so let's sample from
                # the Laplacian approximation at the MAP point
                mean = self.mean
                cov = self.cov
            else:
                raise ValueError(
                    "Please provide a variance `var`, or run `optimize` before calling this method."
                )
        else:
            if self.mean is not None:
                # User ran `optimize`, so let's sample around
                # the MAP point
                mean = self.mean
            else:
                # Sample around the test value
                mean = self.bij.map(self.start)
            cov = var * np.eye(len(mean))

        # Sample from the Gaussian
        p0 = np.random.multivariate_normal(mean, cov, size=nwalkers)

        # Ensure the log probability is finite everywhere
        if check_finite:
            for k in range(nwalkers):
                n = 0
                while not np.isfinite(self.logp(p0[k])):
                    if n > max_tries:
                        raise ValueError(
                            "Unable to initialize walkers at a point with finite `logp`. "
                            "Try reducing `var` or running `optimize()`."
                        )
                    p0[k] = np.random.multivariate_normal(mean, cov)

        return p0

    def logp(self, x):
        """
        Return the log probability evaluated at a point.

        Args:
            x (array): The array of parameter values.

        Returns:
            The value of the log probability function evaluated at ``x``.
        """
        try:
            res = self.func(
                *get_args_for_theano_function(
                    self.bij.rmap(x), model=self.model
                )
            )
        except Exception:
            import traceback

            print("array:", x)
            print("point:", self.bij.rmap(x))
            traceback.print_exc()
            raise

        return res

    def transform(self, samples, varnames=None, progress=True):
        """
        Transform samples from the internal to the user parametrization.

        Args:
            samples (array or matrix): The set of points to transform.
            varnames (list, optional): The names of the parameters to
                transform to. These may either be strings or the actual
                ``pymc3`` model variables. If ``None`` (default), these
                are determined automatically and may be accessed as the
                ``varnames`` attribute of this class.
            progress (bool, optional): Display a progress bar? Default ``True``.

        Returns:
            An array of shape ``(..., len(varnames))``, where
            ``... = samples.shape[:-1]``, containing the transformed
            samples.
        """
        is_1d = len(np.shape(samples)) == 1
        samples = np.atleast_2d(samples)
        if varnames is None:
            varnames = self.varnames
        varnames = [v.name if not type(v) is str else v for v in varnames]
        shape = list(samples.shape)
        shape[-1] = len(varnames)
        x = np.zeros(shape)
        for k in tqdm(range(len(samples)), disable=not progress):
            point = pmx.optim.get_point(self, samples[k])
            for j, name in enumerate(varnames):
                x[k, j] = point[name]
        if is_1d:
            return x.flatten()
        else:
            return x
Ejemplo n.º 26
0
    def __init__(
        self,
        draws: int,
        tune: int,
        step_method,
        step_method_pickled,
        chain: int,
        seed,
        start,
        mp_ctx,
        pickle_backend,
    ):
        self.chain = chain
        process_name = "worker_chain_%s" % chain
        self._msg_pipe, remote_conn = multiprocessing.Pipe()

        self._shared_point = {}
        self._point = {}

        for name, shape, dtype in DictToArrayBijection.map(start).point_map_info:
            size = 1
            for dim in shape:
                size *= int(dim)
            size *= dtype.itemsize
            if size != ctypes.c_size_t(size).value:
                raise ValueError("Variable %s is too large" % name)

            array = mp_ctx.RawArray("c", size)
            self._shared_point[name] = (array, shape, dtype)
            array_np = np.frombuffer(array, dtype).reshape(shape)
            array_np[...] = start[name]
            self._point[name] = array_np

        self._readable = True
        self._num_samples = 0

        if step_method_pickled is not None:
            step_method_send = step_method_pickled
        else:
            step_method_send = step_method

        self._process = mp_ctx.Process(
            daemon=True,
            name=process_name,
            target=_run_process,
            args=(
                process_name,
                remote_conn,
                step_method_send,
                step_method_pickled is not None,
                self._shared_point,
                draws,
                tune,
                seed,
                pickle_backend,
            ),
        )
        self._process.start()
        # Close the remote pipe, so that we get notified if the other
        # end is closed.
        remote_conn.close()
Ejemplo n.º 27
0
def find_MAP(start=None,
             vars=None,
             method="L-BFGS-B",
             return_raw=False,
             include_transformed=True,
             progressbar=True,
             maxeval=5000,
             model=None,
             *args,
             **kwargs):
    """
    Finds the local maximum a posteriori point given a model.

    find_MAP should not be used to initialize the NUTS sampler. Simply call pymc3.sample() and it will automatically initialize NUTS in a better way.

    Parameters
    ----------
    start: `dict` of parameter values (Defaults to `model.test_point`)
    vars: list
        List of variables to optimize and set to optimum (Defaults to all continuous).
    method: string or callable
        Optimization algorithm (Defaults to 'L-BFGS-B' unless
        discrete variables are specified in `vars`, then
        `Powell` which will perform better).  For instructions on use of a callable,
        refer to SciPy's documentation of `optimize.minimize`.
    return_raw: bool
        Whether to return the full output of scipy.optimize.minimize (Defaults to `False`)
    include_transformed: bool, optional defaults to True
        Flag for reporting automatically transformed variables in addition
        to original variables.
    progressbar: bool, optional defaults to True
        Whether or not to display a progress bar in the command line.
    maxeval: int, optional, defaults to 5000
        The maximum number of times the posterior distribution is evaluated.
    model: Model (optional if in `with` context)
    *args, **kwargs
        Extra args passed to scipy.optimize.minimize

    Notes
    -----
    Older code examples used find_MAP() to initialize the NUTS sampler,
    but this is not an effective way of choosing starting values for sampling.
    As a result, we have greatly enhanced the initialization of NUTS and
    wrapped it inside pymc3.sample() and you should thus avoid this method.
    """
    model = modelcontext(model)
    if start is None:
        start = model.test_point
    else:
        update_start_vals(start, model.test_point, model)

    check_start_vals(start, model)

    if vars is None:
        vars = model.cont_vars
    vars = inputvars(vars)
    disc_vars = list(typefilter(vars, discrete_types))
    allinmodel(vars, model)

    start = Point(start, model=model)
    bij = DictToArrayBijection(ArrayOrdering(vars), start)
    logp_func = bij.mapf(model.fastlogp_nojac)
    x0 = bij.map(start)

    try:
        dlogp_func = bij.mapf(model.fastdlogp_nojac(vars))
        compute_gradient = True
    except (AttributeError, NotImplementedError, tg.NullTypeGradError):
        compute_gradient = False

    if disc_vars or not compute_gradient:
        pm._log.warning(
            "Warning: gradient not available." +
            "(E.g. vars contains discrete variables). MAP " +
            "estimates may not be accurate for the default " +
            "parameters. Defaulting to non-gradient minimization " +
            "'Powell'.")
        method = "Powell"

    if "fmin" in kwargs:
        fmin = kwargs.pop("fmin")
        warnings.warn(
            "In future versions, set the optimization algorithm with a string. "
            'For example, use `method="L-BFGS-B"` instead of '
            '`fmin=sp.optimize.fmin_l_bfgs_b"`.')

        cost_func = CostFuncWrapper(maxeval, progressbar, logp_func)

        # Check to see if minimization function actually uses the gradient
        if "fprime" in getargspec(fmin).args:

            def grad_logp(point):
                return nan_to_num(-dlogp_func(point))

            opt_result = fmin(cost_func, x0, fprime=grad_logp, *args, **kwargs)
        else:
            # Check to see if minimization function uses a starting value
            if "x0" in getargspec(fmin).args:
                opt_result = fmin(cost_func, x0, *args, **kwargs)
            else:
                opt_result = fmin(cost_func, *args, **kwargs)

        if isinstance(opt_result, tuple):
            mx0 = opt_result[0]
        else:
            mx0 = opt_result
    else:
        # remove 'if' part, keep just this 'else' block after version change
        if compute_gradient:
            cost_func = CostFuncWrapper(maxeval, progressbar, logp_func,
                                        dlogp_func)
        else:
            cost_func = CostFuncWrapper(maxeval, progressbar, logp_func)

        try:
            opt_result = minimize(cost_func,
                                  x0,
                                  method=method,
                                  jac=compute_gradient,
                                  *args,
                                  **kwargs)
            mx0 = opt_result["x"]  # r -> opt_result
        except (KeyboardInterrupt, StopIteration) as e:
            mx0, opt_result = cost_func.previous_x, None
            if isinstance(e, StopIteration):
                pm._log.info(e)
        finally:
            last_v = cost_func.n_eval
            if progressbar:
                assert isinstance(cost_func.progress, ProgressBar)
                cost_func.progress.total = last_v
                cost_func.progress.update(last_v)
                print()

    vars = get_default_varnames(model.unobserved_RVs, include_transformed)
    mx = {
        var.name: value
        for var, value in zip(vars,
                              model.fastfn(vars)(bij.rmap(mx0)))
    }

    if return_raw:
        return mx, opt_result
    else:
        return mx
Ejemplo n.º 28
0
    def astep(self, q0):
        """One MLDA step, given current sample q0"""
        # Check if the tuning flag has been changed and if yes,
        # change the proposal's tuning flag and reset self.accepted
        # This is triggered by _iter_sample while the highest-level MLDA step
        # method is running. It then propagates to all levels.
        if self.proposal_dist.tune != self.tune:
            self.proposal_dist.tune = self.tune
            # set tune in sub-methods of compound stepper explicitly because
            # it is not set within sample.py (only the CompoundStep's tune flag is)
            if isinstance(self.step_method_below, CompoundStep):
                for method in self.step_method_below.methods:
                    method.tune = self.tune
            self.accepted = 0

        # Convert current sample from numpy array ->
        # dict before feeding to proposal
        q0_dict = DictToArrayBijection.rmap(q0)

        # Set subchain_selection (which sample from the coarse chain
        # is passed as a proposal to the fine chain). If variance
        # reduction is used, a random sample is selected as proposal.
        # If variance reduction is not used, the last sample is
        # selected as proposal.
        if self.variance_reduction:
            self.subchain_selection = np.random.randint(0, self.subsampling_rate)
        else:
            self.subchain_selection = self.subsampling_rate - 1
        self.proposal_dist.subchain_selection = self.subchain_selection

        # Call the recursive DA proposal to get proposed sample
        # and convert dict -> numpy array
        pre_q = self.proposal_dist(q0_dict)
        q = DictToArrayBijection.map(pre_q)

        # Evaluate MLDA acceptance log-ratio
        # If proposed sample from lower levels is the same as current one,
        # do not calculate likelihood, just set accept to 0.0
        if (q.data == q0.data).all():
            accept = np.float(0.0)
            skipped_logp = True
        else:
            accept = self.delta_logp(q.data, q0.data) + self.delta_logp_below(q0.data, q.data)
            skipped_logp = False

        # Accept/reject sample - next sample is stored in q_new
        q_new, accepted = metrop_select(accept, q, q0)
        if skipped_logp:
            accepted = False

        # if sample is accepted, update self.Q_last with the sample's Q value
        # runs only for VR or when store_Q_fine is True
        if self.variance_reduction or self.store_Q_fine:
            if accepted and not skipped_logp:
                self.Q_last = self.model.Q.get_value()

        # Variance reduction
        if self.variance_reduction:
            self.update_vr_variables(accepted, skipped_logp)

        # Adaptive error model - runs only during tuning.
        if self.tune and self.adaptive_error_model:
            self.update_error_estimate(accepted, skipped_logp)

        # Update acceptance counter
        self.accepted += accepted

        stats = {"tune": self.tune, "accept": np.exp(accept), "accepted": accepted}

        # Save the VR statistics to the stats dictionary (only happens in the
        # top MLDA level)
        if (self.variance_reduction or self.store_Q_fine) and not self.is_child:
            q_stats = {}
            if self.variance_reduction:
                m = self
                for level in range(self.num_levels - 1, 0, -1):
                    # save the Q differences for this level and iteration
                    q_stats[f"Q_{level}_{level - 1}"] = np.array(m.Q_diff)
                    # this makes sure Q_diff is reset for
                    # the next iteration
                    m.Q_diff = []
                    if level == 1:
                        break
                    m = m.step_method_below
                q_stats["Q_0"] = np.array(m.Q_base_full)
                m.Q_base_full = []
            if self.store_Q_fine:
                q_stats["Q_" + str(self.num_levels - 1)] = np.array(self.Q_last)
            stats = {**stats, **q_stats}

        # Capture the base tuning stats from the level below.
        self.base_tuning_stats = []

        if isinstance(self.step_method_below, MLDA):
            self.base_tuning_stats = self.step_method_below.base_tuning_stats
        elif isinstance(self.step_method_below, MetropolisMLDA):
            self.base_tuning_stats.append({"base_scaling": self.step_method_below.scaling[0]})
        elif isinstance(self.step_method_below, DEMetropolisZMLDA):
            self.base_tuning_stats.append(
                {
                    "base_scaling": self.step_method_below.scaling[0],
                    "base_lambda": self.step_method_below.lamb,
                }
            )
        elif isinstance(self.step_method_below, CompoundStep):
            # Below method is CompoundStep
            for method in self.step_method_below.methods:
                if isinstance(method, MetropolisMLDA):
                    self.base_tuning_stats.append({"base_scaling": method.scaling[0]})
                elif isinstance(method, DEMetropolisZMLDA):
                    self.base_tuning_stats.append(
                        {"base_scaling": method.scaling[0], "base_lambda": method.lamb}
                    )

        return q_new, [stats] + self.base_tuning_stats
Ejemplo n.º 29
0
def find_MAP(start=None,
             vars=None,
             method="L-BFGS-B",
             return_raw=False,
             include_transformed=True,
             progressbar=True,
             maxeval=5000,
             model=None,
             *args,
             **kwargs):
    """Finds the local maximum a posteriori point given a model.

    `find_MAP` should not be used to initialize the NUTS sampler. Simply call
    ``pymc3.sample()`` and it will automatically initialize NUTS in a better
    way.

    Parameters
    ----------
    start: `dict` of parameter values (Defaults to `model.initial_point`)
    vars: list
        List of variables to optimize and set to optimum (Defaults to all continuous).
    method: string or callable
        Optimization algorithm (Defaults to 'L-BFGS-B' unless
        discrete variables are specified in `vars`, then
        `Powell` which will perform better).  For instructions on use of a callable,
        refer to SciPy's documentation of `optimize.minimize`.
    return_raw: bool
        Whether to return the full output of scipy.optimize.minimize (Defaults to `False`)
    include_transformed: bool, optional defaults to True
        Flag for reporting automatically transformed variables in addition
        to original variables.
    progressbar: bool, optional defaults to True
        Whether or not to display a progress bar in the command line.
    maxeval: int, optional, defaults to 5000
        The maximum number of times the posterior distribution is evaluated.
    model: Model (optional if in `with` context)
    *args, **kwargs
        Extra args passed to scipy.optimize.minimize

    Notes
    -----
    Older code examples used `find_MAP` to initialize the NUTS sampler,
    but this is not an effective way of choosing starting values for sampling.
    As a result, we have greatly enhanced the initialization of NUTS and
    wrapped it inside ``pymc3.sample()`` and you should thus avoid this method.
    """
    model = modelcontext(model)

    if vars is None:
        vars = model.cont_vars
        if not vars:
            raise ValueError("Model has no unobserved continuous variables.")
    vars = inputvars(vars)
    disc_vars = list(typefilter(vars, discrete_types))
    allinmodel(vars, model)
    start = copy.deepcopy(start)
    if start is None:
        start = model.initial_point
    else:
        model.update_start_vals(start, model.initial_point)
    model.check_start_vals(start)

    start = Point(start, model=model)

    x0 = DictToArrayBijection.map(start)

    # TODO: If the mapping is fixed, we can simply create graphs for the
    # mapping and avoid all this bijection overhead
    def logp_func(x):
        return DictToArrayBijection.mapf(model.fastlogp_nojac)(RaveledVars(
            x, x0.point_map_info))

    try:
        # This might be needed for calls to `dlogp_func`
        # start_map_info = tuple((v.name, v.shape, v.dtype) for v in vars)

        def dlogp_func(x):
            return DictToArrayBijection.mapf(model.fastdlogp_nojac(vars))(
                RaveledVars(x, x0.point_map_info))

        compute_gradient = True
    except (AttributeError, NotImplementedError, tg.NullTypeGradError):
        compute_gradient = False

    if disc_vars or not compute_gradient:
        pm._log.warning(
            "Warning: gradient not available." +
            "(E.g. vars contains discrete variables). MAP " +
            "estimates may not be accurate for the default " +
            "parameters. Defaulting to non-gradient minimization " +
            "'Powell'.")
        method = "Powell"

    if compute_gradient:
        cost_func = CostFuncWrapper(maxeval, progressbar, logp_func,
                                    dlogp_func)
    else:
        cost_func = CostFuncWrapper(maxeval, progressbar, logp_func)

    try:
        opt_result = minimize(cost_func,
                              x0.data,
                              method=method,
                              jac=compute_gradient,
                              *args,
                              **kwargs)
        mx0 = opt_result["x"]  # r -> opt_result
    except (KeyboardInterrupt, StopIteration) as e:
        mx0, opt_result = cost_func.previous_x, None
        if isinstance(e, StopIteration):
            pm._log.info(e)
    finally:
        last_v = cost_func.n_eval
        if progressbar:
            assert isinstance(cost_func.progress, ProgressBar)
            cost_func.progress.total = last_v
            cost_func.progress.update(last_v)
            print()

    mx0 = RaveledVars(mx0, x0.point_map_info)

    vars = get_default_varnames(model.unobserved_value_vars,
                                include_transformed)
    mx = {
        var.name: value
        for var, value in zip(
            vars,
            model.fastfn(vars)(DictToArrayBijection.rmap(mx0)))
    }

    if return_raw:
        return mx, opt_result
    else:
        return mx
Ejemplo n.º 30
0
def optimize(start=None,
             vars=None,
             model=None,
             return_info=False,
             verbose=True,
             **kwargs):
    """Maximize the log prob of a PyMC3 model using scipy

    All extra arguments are passed directly to the ``scipy.optimize.minimize``
    function.

    Args:
        start: The PyMC3 coordinate dictionary of the starting position
        vars: The variables to optimize
        model: The PyMC3 model
        return_info: Return both the coordinate dictionary and the result of
            ``scipy.optimize.minimize``
        verbose: Print the success flag and log probability to the screen

    """
    from scipy.optimize import minimize

    model = pm.modelcontext(model)

    # Work out the full starting coordinates
    if start is None:
        start = model.test_point
    else:
        update_start_vals(start, model.test_point, model)

    # Fit all the parameters by default
    if vars is None:
        vars = model.cont_vars
    vars = inputvars(vars)
    allinmodel(vars, model)

    # Work out the relevant bijection map
    start = Point(start, model=model)
    bij = DictToArrayBijection(ArrayOrdering(vars), start)

    # Pre-compile the theano model and gradient
    nlp = -model.logpt
    grad = theano.grad(nlp, vars, disconnected_inputs="ignore")
    func = get_theano_function_for_var([nlp] + grad, model=model)

    if verbose:
        names = [
            get_untransformed_name(v.name)
            if is_transformed_name(v.name) else v.name for v in vars
        ]
        sys.stderr.write("optimizing logp for variables: [{0}]\n".format(
            ", ".join(names)))
        bar = tqdm.tqdm()

    # This returns the objective function and its derivatives
    def objective(vec):
        res = func(*get_args_for_theano_function(bij.rmap(vec), model=model))
        d = dict(zip((v.name for v in vars), res[1:]))
        g = bij.map(d)
        if verbose:
            bar.set_postfix(logp="{0:e}".format(-res[0]))
            bar.update()
        return res[0], g

    # Optimize using scipy.optimize
    x0 = bij.map(start)
    initial = objective(x0)[0]
    kwargs["jac"] = True
    info = minimize(objective, x0, **kwargs)

    # Only accept the output if it is better than it was
    x = info.x if (np.isfinite(info.fun) and info.fun < initial) else x0

    # Coerce the output into the right format
    vars = get_default_varnames(model.unobserved_RVs, True)
    point = {
        var.name: value
        for var, value in zip(vars,
                              model.fastfn(vars)(bij.rmap(x)))
    }

    if verbose:
        bar.close()
        sys.stderr.write("message: {0}\n".format(info.message))
        sys.stderr.write("logp: {0} -> {1}\n".format(-initial, -info.fun))
        if not np.isfinite(info.fun):
            logger.warning("final logp not finite, returning initial point")
            logger.warning(
                "this suggests that something is wrong with the model")
            logger.debug("{0}".format(info))

    if return_info:
        return point, info
    return point
Ejemplo n.º 31
0
 def dlogp_func(x):
     return DictToArrayBijection.mapf(model.fastdlogp_nojac(vars))(
         RaveledVars(x, x0.point_map_info))