Esempio n. 1
0
    def initialize_population(self):
        """Create an initial population from the prior distribution."""
        population = []
        var_info = OrderedDict()
        if self.start is None:
            init_rnd = sample_prior_predictive(
                self.draws,
                var_names=[v.name for v in self.model.unobserved_RVs],
                model=self.model,
            )
        else:
            init_rnd = self.start

        init = self.model.initial_point

        for v in self.variables:
            var_info[v.name] = (init[v.name].shape, init[v.name].size)

        for i in range(self.draws):

            point = Point(
                {v.name: init_rnd[v.name][i]
                 for v in self.variables},
                model=self.model)
            population.append(DictToArrayBijection.map(point).data)

        self.posterior = np.array(floatX(population))
        self.var_info = var_info
Esempio n. 2
0
    def initialize_population(self):
        """Create an initial population from the prior distribution."""
        population = []
        var_info = OrderedDict()

        init_rnd = sample_prior_predictive(
            self.draws,
            var_names=[v.name for v in self.model.unobserved_RVs],
            model=self.model,
        )


        init = self.model.test_point

        for v in self.variables:
            var_info[v.name] = (init[v.name].shape, init[v.name].size)

        for i in range(self.draws):

            point = Point({v.name: init_rnd[v.name][i] for v in self.variables}, model=self.model)
            population.append(self.model.dict_to_array(point))

        self.nf_samples = np.array(floatX(population))
        self.live_points = np.array(floatX(population))
        self.var_info = var_info
        self.posterior = np.empty((0, np.shape(self.nf_samples)[1]))
Esempio n. 3
0
    def initialize_population(self):
        """Create an initial population from the prior distribution."""
        population = []

        if self.init_samples is None:
            init_rnd = sample_prior_predictive(
                self.N,
                var_names=[v.name for v in self.model.unobserved_RVs],
                model=self.model,
            )

            for i in range(self.N):

                point = Point(
                    {v.name: init_rnd[v.name][i]
                     for v in self.variables},
                    model=self.model)
                population.append(self.model.dict_to_array(point))
            self.prior_samples = np.array(floatX(population))

        elif self.init_samples is not None:
            self.prior_samples = np.copy(self.init_samples)

        self.samples = np.copy(self.prior_samples)
        self.nf_samples = np.copy(self.samples)
        self.get_posterior_logp()
        self.get_prior_logp()
        self.log_weight = self.posterior_logp - self.prior_logp
        self.log_evidence = logsumexp(self.log_weight) - np.log(
            len(self.log_weight))
        self.evidence = np.exp(self.log_evidence)
        self.log_weight = self.log_weight - self.log_evidence
        self.regularize_weights()

        #same as in fitnf but prior~q
        self.log_weight_pq_num = self.posterior_logp + 2 * self.prior_logp
        self.log_weight_pq_den = 3 * self.prior_logp
        self.log_evidence_pq = logsumexp(self.log_weight_pq_num) - logsumexp(
            self.log_weight_pq_den)
        self.evidence_pq = np.exp(self.log_evidence_pq)
        self.log_weight_pq = self.posterior_logp - self.prior_logp - self.log_evidence_pq
        self.pq_bw_loss = np.log(
            (np.exp(self.posterior_logp) -
             np.exp(self.log_evidence_pq +
                    self.prior_logp))**2)  #not actually used yet I think
        self.regularize_weights_pq()

        #sum of mean loss (p - q*Z_pq)^2 /N for diagnostic purposes
        self.log_mean_loss = np.log(
            np.mean((np.exp(self.posterior_logp) -
                     np.exp(self.prior_logp + self.log_evidence_pq))**2))

        self.init_weights_cleanup(lambda x: self.prior_logp(x),
                                  lambda x: self.prior_dlogp(x))
        self.q_ess = self.calculate_ess(self.log_weight)
        self.total_ess = self.calculate_ess(self.sinf_logw)

        self.all_logq = np.array([])
        self.nf_models = []
        self.nf_models_uw = []
    def __init__(self, vars=None, scaling=None, step_scale=0.25, is_cov=False,
                 model=None, blocked=True, potential=None,
                 integrator="leapfrog", dtype=None, **theano_kwargs):
        """Set up Hamiltonian samplers with common structures.

        Parameters
        ----------
        vars : list of theano variables
        scaling : array_like, ndim = {1,2}
            Scaling for momentum distribution. 1d arrays interpreted matrix
            diagonal.
        step_scale : float, default=0.25
            Size of steps to take, automatically scaled down by 1/n**(1/4)
        is_cov : bool, default=False
            Treat scaling as a covariance matrix/vector if True, else treat
            it as a precision matrix/vector
        model : pymc3 Model instance
        blocked: bool, default=True
        potential : Potential, optional
            An object that represents the Hamiltonian with methods `velocity`,
            `energy`, and `random` methods.
        **theano_kwargs: passed to theano functions
        """
        model = modelcontext(model)

        if vars is None:
            vars = model.cont_vars
        vars = inputvars(vars)

        super(BaseHMC, self).__init__(vars, blocked=blocked, model=model,
                                      dtype=dtype, **theano_kwargs)

        size = self._logp_dlogp_func.size

        if scaling is None and potential is None:
            mean = floatX(np.zeros(size))
            var = floatX(np.ones(size))
            potential = QuadPotentialDiagAdapt(size, mean, var, 10)

        if isinstance(scaling, dict):
            point = Point(scaling, model=model)
            scaling = guess_scaling(point, model=model, vars=vars)

        if scaling is not None and potential is not None:
            raise ValueError("Can not specify both potential and scaling.")

        self.step_size = step_scale / (size ** 0.25)
        if potential is not None:
            self.potential = potential
        else:
            self.potential = quad_potential(scaling, is_cov)

        self.integrator = integration.CpuLeapfrogIntegrator(self.potential, self._logp_dlogp_func)
Esempio n. 5
0
    def __call__(self, q0_dict: dict) -> dict:
        """Returns proposed sample given the current sample
        in dictionary form (q0_dict)."""

        # Logging is reduced to avoid extensive console output
        # during multiple recursive calls of subsample()
        _log = logging.getLogger("pymc3")
        _log.setLevel(logging.ERROR)

        with self.model_below:
            # Check if the tuning flag has been set to False
            # in which case tuning is stopped. The flag is set
            # to False (by MLDA's astep) when the burn-in
            # iterations of the highest-level MLDA sampler run out.
            # The change propagates to all levels.

            if self.tune:
                # Subsample in tuning mode
                self.trace = subsample(
                    draws=0,
                    step=self.step_method_below,
                    start=q0_dict,
                    trace=self.trace,
                    tune=self.subsampling_rate,
                )
            else:
                # Subsample in normal mode without tuning
                # If DEMetropolisZMLDA is the base sampler a flag is raised to
                # make sure that history is edited after tuning ends
                if self.tuning_end_trigger:
                    if isinstance(self.step_method_below, DEMetropolisZMLDA):
                        self.step_method_below.tuning_end_trigger = True
                    self.tuning_end_trigger = False

                self.trace = subsample(
                    draws=self.subsampling_rate,
                    step=self.step_method_below,
                    start=q0_dict,
                    trace=self.trace,
                )

        # set logging back to normal
        _log.setLevel(logging.NOTSET)

        # return sample with index self.subchain_selection from the generated
        # sequence of length self.subsampling_rate. The index is set within
        # MLDA's astep() function
        new_point = self.trace.point(-self.subsampling_rate + self.subchain_selection)
        new_point = Point(new_point, model=self.model_below, filter_model_vars=True)

        return new_point
Esempio n. 6
0
def find_hessian_diag(point, vars=None, model=None):
    """
    Returns Hessian of logp at the point passed.

    Parameters
    ----------
    model: Model (optional if in `with` context)
    point: dict
    vars: list
        Variables for which Hessian is to be calculated.
    """
    model = modelcontext(model)
    H = model.fastfn(hessian_diag(model.logpt, vars))
    return H(Point(point, model=model))
Esempio n. 7
0
def _iter_sample(draws,
                 step,
                 start=None,
                 trace=None,
                 chain=0,
                 tune=None,
                 model=None,
                 random_seed=-1):
    """
    Modified from :func:`pymc3.sampling._iter_sample`

    tune: int
        adaptiv step-size scaling is stopped after this chain sample
    """

    model = modelcontext(model)

    draws = int(draws)

    if draws < 1:
        raise ValueError('Argument `draws` should be above 0.')

    if start is None:
        start = {}

    if random_seed != -1:
        seed(random_seed)

    try:
        step = CompoundStep(step)
    except TypeError:
        pass

    point = Point(start, model=model)

    step.chain_index = chain

    trace.setup(draws, chain)
    for i in range(draws):
        if i == tune:
            step = stop_tuning(step)

        logger.debug('Step: Chain_%i step_%i' % (chain, i))
        point, out_list = step.step(point)

        trace.write(out_list, i)
        yield trace
Esempio n. 8
0
    def __init__(self, vars=None, scaling=None, step_scale=0.25, is_cov=False,
                 model=None, blocked=True, use_single_leapfrog=False, **theano_kwargs):
        """Superclass to implement Hamiltonian/hybrid monte carlo

        Parameters
        ----------
        vars : list of theano variables
        scaling : array_like, ndim = {1,2}
            Scaling for momentum distribution. 1d arrays interpreted matrix diagonal.
        step_scale : float, default=0.25
            Size of steps to take, automatically scaled down by 1/n**(1/4)
        is_cov : bool, default=False
            Treat scaling as a covariance matrix/vector if True, else treat it as a
            precision matrix/vector
        state
            State object
        model : pymc3 Model instance.  default=Context model
        blocked: Boolean, default True
        use_single_leapfrog: Boolean, will leapfrog steps take a single step at a time.
            default False.
        **theano_kwargs: passed to theano functions
        """
        model = modelcontext(model)

        if vars is None:
            vars = model.cont_vars
        vars = inputvars(vars)

        if scaling is None:
            scaling = model.test_point

        if isinstance(scaling, dict):
            scaling = guess_scaling(Point(scaling, model=model), model=model, vars=vars)

        n = scaling.shape[0]
        self.step_size = step_scale / (n ** 0.25)
        self.potential = quad_potential(scaling, is_cov, as_cov=False)

        shared = make_shared_replacements(vars, model)
        if theano_kwargs is None:
            theano_kwargs = {}

        self.H, self.compute_energy, self.leapfrog, self._vars = get_theano_hamiltonian_functions(
            vars, shared, model.logpt, self.potential, use_single_leapfrog, **theano_kwargs)

        super(BaseHMC, self).__init__(vars, shared, blocked=blocked)
Esempio n. 9
0
    def __init__(self, model=None):

        # Get the model
        self.model = pm.modelcontext(model)

        # Get the variables
        self.varnames = get_default_varnames(self.model.unobserved_RVs, False)

        # Get the starting point
        self.start = Point(self.model.test_point, model=self.model)
        self.ndim = len(self.start)
        self.mean = None
        self.cov = None

        # Compile the log probability function
        self.vars = inputvars(self.model.cont_vars)
        self.bij = DictToArrayBijection(ArrayOrdering(self.vars), self.start)
        self.func = get_theano_function_for_var(
            self.model.logpt, model=self.model
        )
Esempio n. 10
0
    def test_missing_data(self):
        # Originally from a case described in #3122
        X = np.random.binomial(1, 0.5, 10)
        X[0] = -1  # masked a single value
        X = np.ma.masked_values(X, value=-1)
        with pm.Model() as m:
            x1 = pm.Uniform("x1", 0.0, 1.0)
            x2 = pm.Bernoulli("x2", x1, observed=X)

        gf = m.logp_dlogp_function()
        gf._extra_are_set = True

        assert m["x2_missing"].type == gf._extra_vars_shared["x2_missing"].type

        pnt = m.test_point.copy()
        del pnt["x2_missing"]

        res = [gf(DictToArrayBijection.map(Point(pnt, model=m))) for i in range(5)]

        assert reduce(lambda x, y: np.array_equal(x, y) and y, res) is not False
Esempio n. 11
0
def fixed_hessian(point, vars=None, model=None):
    """
    Returns a fixed Hessian for any chain location.

    Parameters
    ----------
    model: Model (optional if in `with` context)
    point: dict
    vars: list
        Variables for which Hessian is to be calculated.
    """

    model = modelcontext(model)
    if vars is None:
        vars = model.cont_vars
    vars = inputvars(vars)

    point = Point(point, model=model)

    rval = np.ones(DictToArrayBijection.map(point).size) / 10
    return rval
Esempio n. 12
0
    def __init__(self, vars=None, model=None, point=None):
        self.model = pm.modelcontext(model)

        # Work out the full starting coordinates
        if point is None:
            point = self.model.test_point
        else:
            pm.util.update_start_vals(point, self.model.test_point, self.model)

        # Fit all the parameters by default
        if vars is None:
            vars = self.model.cont_vars
        self.vars = inputvars(vars)
        allinmodel(self.vars, self.model)

        # Work out the relevant bijection map
        point = Point(point, model=self.model)
        self.bijection = DictToArrayBijection(ArrayOrdering(self.vars), point)

        # Pre-compile the theano model and gradient
        nlp = -self.model.logpt
        grad = theano.grad(nlp, self.vars, disconnected_inputs="ignore")
        self.func = get_theano_function_for_var([nlp] + grad, model=self.model)
Esempio n. 13
0
    def __init__(self,
                 vars=None,
                 scaling=None,
                 step_scale=0.25,
                 is_cov=False,
                 model=None,
                 blocked=True,
                 potential=None,
                 dtype=None,
                 Emax=1000,
                 target_accept=0.8,
                 gamma=0.05,
                 k=0.75,
                 t0=10,
                 adapt_step_size=True,
                 step_rand=None,
                 **aesara_kwargs):
        """Set up Hamiltonian samplers with common structures.

        Parameters
        ----------
        vars: list of aesara variables
        scaling: array_like, ndim = {1,2}
            Scaling for momentum distribution. 1d arrays interpreted matrix
            diagonal.
        step_scale: float, default=0.25
            Size of steps to take, automatically scaled down by 1/n**(1/4)
        is_cov: bool, default=False
            Treat scaling as a covariance matrix/vector if True, else treat
            it as a precision matrix/vector
        model: pymc3 Model instance
        blocked: bool, default=True
        potential: Potential, optional
            An object that represents the Hamiltonian with methods `velocity`,
            `energy`, and `random` methods.
        **aesara_kwargs: passed to aesara functions
        """
        self._model = modelcontext(model)

        if vars is None:
            vars = self._model.cont_vars
        vars = inputvars(vars)

        super().__init__(vars,
                         blocked=blocked,
                         model=model,
                         dtype=dtype,
                         **aesara_kwargs)

        self.adapt_step_size = adapt_step_size
        self.Emax = Emax
        self.iter_count = 0
        size = self._logp_dlogp_func.size

        self.step_size = step_scale / (size**0.25)
        self.step_adapt = step_sizes.DualAverageAdaptation(
            self.step_size, target_accept, gamma, k, t0)
        self.target_accept = target_accept
        self.tune = True

        if scaling is None and potential is None:
            mean = floatX(np.zeros(size))
            var = floatX(np.ones(size))
            potential = QuadPotentialDiagAdapt(size, mean, var, 10)

        if isinstance(scaling, dict):
            point = Point(scaling, model=model)
            scaling = guess_scaling(point, model=model, vars=vars)

        if scaling is not None and potential is not None:
            raise ValueError("Can not specify both potential and scaling.")

        if potential is not None:
            self.potential = potential
        else:
            self.potential = quad_potential(scaling, is_cov)

        self.integrator = integration.CpuLeapfrogIntegrator(
            self.potential, self._logp_dlogp_func)

        self._step_rand = step_rand
        self._warnings = []
        self._samples_after_tune = 0
        self._num_divs_sample = 0
Esempio n. 14
0
def find_MAP(start=None,
             vars=None,
             method="L-BFGS-B",
             return_raw=False,
             include_transformed=True,
             progressbar=True,
             maxeval=5000,
             model=None,
             *args,
             **kwargs):
    """
    Finds the local maximum a posteriori point given a model.

    find_MAP should not be used to initialize the NUTS sampler. Simply call pymc3.sample() and it will automatically initialize NUTS in a better way.

    Parameters
    ----------
    start: `dict` of parameter values (Defaults to `model.test_point`)
    vars: list
        List of variables to optimize and set to optimum (Defaults to all continuous).
    method: string or callable
        Optimization algorithm (Defaults to 'L-BFGS-B' unless
        discrete variables are specified in `vars`, then
        `Powell` which will perform better).  For instructions on use of a callable,
        refer to SciPy's documentation of `optimize.minimize`.
    return_raw: bool
        Whether to return the full output of scipy.optimize.minimize (Defaults to `False`)
    include_transformed: bool, optional defaults to True
        Flag for reporting automatically transformed variables in addition
        to original variables.
    progressbar: bool, optional defaults to True
        Whether or not to display a progress bar in the command line.
    maxeval: int, optional, defaults to 5000
        The maximum number of times the posterior distribution is evaluated.
    model: Model (optional if in `with` context)
    *args, **kwargs
        Extra args passed to scipy.optimize.minimize

    Notes
    -----
    Older code examples used find_MAP() to initialize the NUTS sampler,
    but this is not an effective way of choosing starting values for sampling.
    As a result, we have greatly enhanced the initialization of NUTS and
    wrapped it inside pymc3.sample() and you should thus avoid this method.
    """
    model = modelcontext(model)
    if start is None:
        start = model.test_point
    else:
        update_start_vals(start, model.test_point, model)

    check_start_vals(start, model)

    if vars is None:
        vars = model.cont_vars
    vars = inputvars(vars)
    disc_vars = list(typefilter(vars, discrete_types))
    allinmodel(vars, model)

    start = Point(start, model=model)
    bij = DictToArrayBijection(ArrayOrdering(vars), start)
    logp_func = bij.mapf(model.fastlogp_nojac)
    x0 = bij.map(start)

    try:
        dlogp_func = bij.mapf(model.fastdlogp_nojac(vars))
        compute_gradient = True
    except (AttributeError, NotImplementedError, tg.NullTypeGradError):
        compute_gradient = False

    if disc_vars or not compute_gradient:
        pm._log.warning(
            "Warning: gradient not available." +
            "(E.g. vars contains discrete variables). MAP " +
            "estimates may not be accurate for the default " +
            "parameters. Defaulting to non-gradient minimization " +
            "'Powell'.")
        method = "Powell"

    if "fmin" in kwargs:
        fmin = kwargs.pop("fmin")
        warnings.warn(
            "In future versions, set the optimization algorithm with a string. "
            'For example, use `method="L-BFGS-B"` instead of '
            '`fmin=sp.optimize.fmin_l_bfgs_b"`.')

        cost_func = CostFuncWrapper(maxeval, progressbar, logp_func)

        # Check to see if minimization function actually uses the gradient
        if "fprime" in getargspec(fmin).args:

            def grad_logp(point):
                return nan_to_num(-dlogp_func(point))

            opt_result = fmin(cost_func, x0, fprime=grad_logp, *args, **kwargs)
        else:
            # Check to see if minimization function uses a starting value
            if "x0" in getargspec(fmin).args:
                opt_result = fmin(cost_func, x0, *args, **kwargs)
            else:
                opt_result = fmin(cost_func, *args, **kwargs)

        if isinstance(opt_result, tuple):
            mx0 = opt_result[0]
        else:
            mx0 = opt_result
    else:
        # remove 'if' part, keep just this 'else' block after version change
        if compute_gradient:
            cost_func = CostFuncWrapper(maxeval, progressbar, logp_func,
                                        dlogp_func)
        else:
            cost_func = CostFuncWrapper(maxeval, progressbar, logp_func)

        try:
            opt_result = minimize(cost_func,
                                  x0,
                                  method=method,
                                  jac=compute_gradient,
                                  *args,
                                  **kwargs)
            mx0 = opt_result["x"]  # r -> opt_result
        except (KeyboardInterrupt, StopIteration) as e:
            mx0, opt_result = cost_func.previous_x, None
            if isinstance(e, StopIteration):
                pm._log.info(e)
        finally:
            last_v = cost_func.n_eval
            if progressbar:
                assert isinstance(cost_func.progress, ProgressBar)
                cost_func.progress.total = last_v
                cost_func.progress.update(last_v)
                print()

    vars = get_default_varnames(model.unobserved_RVs, include_transformed)
    mx = {
        var.name: value
        for var, value in zip(vars,
                              model.fastfn(vars)(bij.rmap(mx0)))
    }

    if return_raw:
        return mx, opt_result
    else:
        return mx
Esempio n. 15
0
    def __init__(self,
                 vars=None,
                 scaling=None,
                 step_scale=0.25,
                 is_cov=False,
                 model=None,
                 blocked=True,
                 use_single_leapfrog=False,
                 potential=None,
                 integrator="leapfrog",
                 **theano_kwargs):
        """Superclass to implement Hamiltonian/hybrid monte carlo

        Parameters
        ----------
        vars : list of theano variables
        scaling : array_like, ndim = {1,2}
            Scaling for momentum distribution. 1d arrays interpreted matrix diagonal.
        step_scale : float, default=0.25
            Size of steps to take, automatically scaled down by 1/n**(1/4)
        is_cov : bool, default=False
            Treat scaling as a covariance matrix/vector if True, else treat it as a
            precision matrix/vector
        model : pymc3 Model instance.  default=Context model
        blocked: Boolean, default True
        use_single_leapfrog: Boolean, will leapfrog steps take a single step at a time.
            default False.
        potential : Potential, optional
            An object that represents the Hamiltonian with methods `velocity`,
            `energy`, and `random` methods.
        **theano_kwargs: passed to theano functions
        """
        model = modelcontext(model)

        if vars is None:
            vars = model.cont_vars
        vars = inputvars(vars)

        if scaling is None and potential is None:
            size = sum(np.prod(var.dshape, dtype=int) for var in vars)
            mean = floatX(np.zeros(size))
            var = floatX(np.ones(size))
            potential = QuadPotentialDiagAdapt(size, mean, var, 10)

        if isinstance(scaling, dict):
            point = Point(scaling, model=model)
            scaling = guess_scaling(point, model=model, vars=vars)

        if scaling is not None and potential is not None:
            raise ValueError("Can not specify both potential and scaling.")

        self.step_size = step_scale / (model.ndim**0.25)
        if potential is not None:
            self.potential = potential
        else:
            self.potential = quad_potential(scaling, is_cov)

        shared = make_shared_replacements(vars, model)
        if theano_kwargs is None:
            theano_kwargs = {}

        self.H, self.compute_energy, self.compute_velocity, self.leapfrog, self.dlogp = get_theano_hamiltonian_functions(
            vars, shared, model.logpt, self.potential, use_single_leapfrog,
            integrator, **theano_kwargs)

        super(BaseHMC, self).__init__(vars, shared, blocked=blocked)
Esempio n. 16
0
    def __init__(self, vars=None, out_vars=None, covariance=None, scale=1.,
                 n_chains=100, tune=True, tune_interval=100, model=None,
                 check_bound=True, likelihood_name='like', backend='csv',
                 proposal_name='MultivariateNormal', **kwargs):

        model = modelcontext(model)

        if vars is None:
            vars = model.vars

        vars = inputvars(vars)

        if out_vars is None:
            out_vars = model.unobserved_RVs

        out_varnames = [out_var.name for out_var in out_vars]

        self.scaling = utility.scalar2floatX(num.atleast_1d(scale))

        self.tune = tune
        self.check_bound = check_bound
        self.tune_interval = tune_interval
        self.steps_until_tune = tune_interval

        self.stage_sample = 0
        self.cumulative_samples = 0
        self.accepted = 0

        self.beta = 1.
        self.stage = 0
        self.chain_index = 0

        # needed to use the same parallel implementation function as for SMC
        self.resampling_indexes = num.arange(n_chains)
        self.n_chains = n_chains

        self.likelihood_name = likelihood_name
        self._llk_index = out_varnames.index(likelihood_name)
        self.backend = backend
        self.discrete = num.concatenate(
            [[v.dtype in discrete_types] * (v.dsize or 1) for v in vars])
        self.any_discrete = self.discrete.any()
        self.all_discrete = self.discrete.all()

        # create initial population
        self.population = []
        self.array_population = num.zeros(n_chains)
        logger.info('Creating initial population for {}'
                    ' chains ...'.format(self.n_chains))
        for i in range(self.n_chains):
            self.population.append(
                Point({v.name: v.random() for v in vars}, model=model))

        self.population[0] = model.test_point

        shared = make_shared_replacements(vars, model)
        self.logp_forw = logp_forw(out_vars, vars, shared)
        self.check_bnd = logp_forw([model.varlogpt], vars, shared)

        super(Metropolis, self).__init__(vars, out_vars, shared)

        # init proposal
        if covariance is None and proposal_name in multivariate_proposals:
            t0 = time()
            self.covariance = init_proposal_covariance(
                bij=self.bij, vars=vars, model=model, pop_size=1000)
            t1 = time()
            logger.info('Time for proposal covariance init: %f' % (t1 - t0))
            scale = self.covariance
        elif covariance is None:
            scale = num.ones(sum(v.dsize for v in vars))
        else:
            scale = covariance

        self.proposal_name = proposal_name
        self.proposal_dist = choose_proposal(
            self.proposal_name, scale=scale)
        self.proposal_samples_array = self.proposal_dist(n_chains)

        self.chain_previous_lpoint = [[]] * self.n_chains
        self._tps = None
Esempio n. 17
0
File: base.py Progetto: wangyf/beat
def _iter_sample(draws,
                 step,
                 start=None,
                 trace=None,
                 chain=0,
                 tune=None,
                 model=None,
                 random_seed=-1,
                 overwrite=True,
                 update_proposal=False,
                 keep_last=False):
    """
    Modified from :func:`pymc3.sampling._iter_sample`

    tune: int
        adaptiv step-size scaling is stopped after this chain sample
    """

    model = modelcontext(model)

    draws = int(draws)

    if draws < 1:
        raise ValueError('Argument `draws` should be above 0.')

    if start is None:
        start = {}

    if random_seed != -1:
        seed(random_seed)

    try:
        step = CompoundStep(step)
    except TypeError:
        pass

    point = Point(start, model=model)

    step.chain_index = chain

    trace.setup(draws, chain, overwrite=overwrite)
    for i in range(draws):
        if i == tune:
            step = stop_tuning(step)

        logger.debug('Step: Chain_%i step_%i' % (chain, i))
        point, out_list = step.step(point)

        try:
            trace.buffer_write(out_list, step.cumulative_samples)
        except BufferError:  # buffer full
            last_sample = deepcopy(trace.buffer[-1])
            if update_proposal:  # only valid for PT for now
                if step.proposal_name in multivariate_proposals:
                    cov = trace.get_sample_covariance(step)
                    if cov is not None:
                        if not isinstance(trace, MemoryChain):
                            filename = '%s/proposal_cov_chain_%i_%i.%s' % (
                                trace.dir_path, trace.chain, trace.cov_counter,
                                'png')
                            from matplotlib import pyplot as plt
                            fig, axs = plt.subplots(1, 1)
                            im = axs.imshow(cov, aspect='auto')
                            plt.colorbar(im)
                            fig.savefig(filename, dpi=150)
                            plt.close(fig)

                        step.proposal_dist = choose_proposal(
                            step.proposal_name, scale=cov)

            trace.record_buffer()
            if keep_last:
                # put last sample back
                trace.buffer_write(*last_sample)

        yield trace
Esempio n. 18
0
def optimize(start=None,
             vars=None,
             model=None,
             return_info=False,
             verbose=True,
             **kwargs):
    """Maximize the log prob of a PyMC3 model using scipy

    All extra arguments are passed directly to the ``scipy.optimize.minimize``
    function.

    Args:
        start: The PyMC3 coordinate dictionary of the starting position
        vars: The variables to optimize
        model: The PyMC3 model
        return_info: Return both the coordinate dictionary and the result of
            ``scipy.optimize.minimize``
        verbose: Print the success flag and log probability to the screen

    """
    from scipy.optimize import minimize

    model = pm.modelcontext(model)

    # Work out the full starting coordinates
    if start is None:
        start = model.test_point
    else:
        update_start_vals(start, model.test_point, model)

    # Fit all the parameters by default
    if vars is None:
        vars = model.cont_vars
    vars = inputvars(vars)
    allinmodel(vars, model)

    # Work out the relevant bijection map
    start = Point(start, model=model)
    bij = DictToArrayBijection(ArrayOrdering(vars), start)

    # Pre-compile the theano model and gradient
    nlp = -model.logpt
    grad = theano.grad(nlp, vars, disconnected_inputs="ignore")
    func = get_theano_function_for_var([nlp] + grad, model=model)

    if verbose:
        names = [
            get_untransformed_name(v.name)
            if is_transformed_name(v.name) else v.name for v in vars
        ]
        sys.stderr.write("optimizing logp for variables: [{0}]\n".format(
            ", ".join(names)))
        bar = tqdm.tqdm()

    # This returns the objective function and its derivatives
    def objective(vec):
        res = func(*get_args_for_theano_function(bij.rmap(vec), model=model))
        d = dict(zip((v.name for v in vars), res[1:]))
        g = bij.map(d)
        if verbose:
            bar.set_postfix(logp="{0:e}".format(-res[0]))
            bar.update()
        return res[0], g

    # Optimize using scipy.optimize
    x0 = bij.map(start)
    initial = objective(x0)[0]
    kwargs["jac"] = True
    info = minimize(objective, x0, **kwargs)

    # Only accept the output if it is better than it was
    x = info.x if (np.isfinite(info.fun) and info.fun < initial) else x0

    # Coerce the output into the right format
    vars = get_default_varnames(model.unobserved_RVs, True)
    point = {
        var.name: value
        for var, value in zip(vars,
                              model.fastfn(vars)(bij.rmap(x)))
    }

    if verbose:
        bar.close()
        sys.stderr.write("message: {0}\n".format(info.message))
        sys.stderr.write("logp: {0} -> {1}\n".format(-initial, -info.fun))
        if not np.isfinite(info.fun):
            logger.warning("final logp not finite, returning initial point")
            logger.warning(
                "this suggests that something is wrong with the model")
            logger.debug("{0}".format(info))

    if return_info:
        return point, info
    return point
Esempio n. 19
0
def find_MAP(start=None,
             vars=None,
             method="L-BFGS-B",
             return_raw=False,
             include_transformed=True,
             progressbar=True,
             maxeval=5000,
             model=None,
             *args,
             **kwargs):
    """Finds the local maximum a posteriori point given a model.

    `find_MAP` should not be used to initialize the NUTS sampler. Simply call
    ``pymc3.sample()`` and it will automatically initialize NUTS in a better
    way.

    Parameters
    ----------
    start: `dict` of parameter values (Defaults to `model.initial_point`)
    vars: list
        List of variables to optimize and set to optimum (Defaults to all continuous).
    method: string or callable
        Optimization algorithm (Defaults to 'L-BFGS-B' unless
        discrete variables are specified in `vars`, then
        `Powell` which will perform better).  For instructions on use of a callable,
        refer to SciPy's documentation of `optimize.minimize`.
    return_raw: bool
        Whether to return the full output of scipy.optimize.minimize (Defaults to `False`)
    include_transformed: bool, optional defaults to True
        Flag for reporting automatically transformed variables in addition
        to original variables.
    progressbar: bool, optional defaults to True
        Whether or not to display a progress bar in the command line.
    maxeval: int, optional, defaults to 5000
        The maximum number of times the posterior distribution is evaluated.
    model: Model (optional if in `with` context)
    *args, **kwargs
        Extra args passed to scipy.optimize.minimize

    Notes
    -----
    Older code examples used `find_MAP` to initialize the NUTS sampler,
    but this is not an effective way of choosing starting values for sampling.
    As a result, we have greatly enhanced the initialization of NUTS and
    wrapped it inside ``pymc3.sample()`` and you should thus avoid this method.
    """
    model = modelcontext(model)

    if vars is None:
        vars = model.cont_vars
        if not vars:
            raise ValueError("Model has no unobserved continuous variables.")
    vars = inputvars(vars)
    disc_vars = list(typefilter(vars, discrete_types))
    allinmodel(vars, model)
    start = copy.deepcopy(start)
    if start is None:
        start = model.initial_point
    else:
        model.update_start_vals(start, model.initial_point)
    model.check_start_vals(start)

    start = Point(start, model=model)

    x0 = DictToArrayBijection.map(start)

    # TODO: If the mapping is fixed, we can simply create graphs for the
    # mapping and avoid all this bijection overhead
    def logp_func(x):
        return DictToArrayBijection.mapf(model.fastlogp_nojac)(RaveledVars(
            x, x0.point_map_info))

    try:
        # This might be needed for calls to `dlogp_func`
        # start_map_info = tuple((v.name, v.shape, v.dtype) for v in vars)

        def dlogp_func(x):
            return DictToArrayBijection.mapf(model.fastdlogp_nojac(vars))(
                RaveledVars(x, x0.point_map_info))

        compute_gradient = True
    except (AttributeError, NotImplementedError, tg.NullTypeGradError):
        compute_gradient = False

    if disc_vars or not compute_gradient:
        pm._log.warning(
            "Warning: gradient not available." +
            "(E.g. vars contains discrete variables). MAP " +
            "estimates may not be accurate for the default " +
            "parameters. Defaulting to non-gradient minimization " +
            "'Powell'.")
        method = "Powell"

    if compute_gradient:
        cost_func = CostFuncWrapper(maxeval, progressbar, logp_func,
                                    dlogp_func)
    else:
        cost_func = CostFuncWrapper(maxeval, progressbar, logp_func)

    try:
        opt_result = minimize(cost_func,
                              x0.data,
                              method=method,
                              jac=compute_gradient,
                              *args,
                              **kwargs)
        mx0 = opt_result["x"]  # r -> opt_result
    except (KeyboardInterrupt, StopIteration) as e:
        mx0, opt_result = cost_func.previous_x, None
        if isinstance(e, StopIteration):
            pm._log.info(e)
    finally:
        last_v = cost_func.n_eval
        if progressbar:
            assert isinstance(cost_func.progress, ProgressBar)
            cost_func.progress.total = last_v
            cost_func.progress.update(last_v)
            print()

    mx0 = RaveledVars(mx0, x0.point_map_info)

    vars = get_default_varnames(model.unobserved_value_vars,
                                include_transformed)
    mx = {
        var.name: value
        for var, value in zip(
            vars,
            model.fastfn(vars)(DictToArrayBijection.rmap(mx0)))
    }

    if return_raw:
        return mx, opt_result
    else:
        return mx