Example #1
0
    def select_end_points(self, mtrace):
        """
        Read trace results and take end points for each chain and set as
        start population for the next stage.
        Parameters
        -------

        mtrace : Multitrace pymc3 object

        Returns
        -------
        population : List of pymc3.Point - objects,
        array_population : Ndarray of trace end-points
        likelihoods : Ndarray of likelihoods of the trace end-points
        """

        array_population = np.zeros((self.n_chains, self.ordering.dimensions))
        n_steps = len(mtrace)
        bij = pm.DictToArrayBijection(self.ordering, self.population[0])

        if self.stage > 0:
            # collect end points of each chain and put into array
            for var, slc, shp, _ in self.ordering.vmap:
                if len(shp) == 0:
                    array_population[:, slc] = np.atleast_2d(
                        mtrace.get_values(varname=var,
                                          burn=n_steps - 1,
                                          combine=True)).T
                else:
                    array_population[:,
                                     slc] = mtrace.get_values(varname=var,
                                                              burn=n_steps - 1,
                                                              combine=True)
            # get likelihoods
            likelihoods = mtrace.get_values(varname=self.likelihood_name,
                                            burn=n_steps - 1,
                                            combine=True)
            population = []

            # map end array_endpoints to dict points
            for i in range(self.n_chains):
                population.append(bij.rmap(array_population[i, :]))

        else:
            # for initial stage only one trace that contains all points for
            # each chain
            likelihoods = mtrace.get_values(self.likelihood_name)
            for var, slc, shp, _ in self.ordering.vmap:
                if len(shp) == 0:
                    array_population[:, slc] = np.atleast_2d(
                        mtrace.get_values(varname=var)).T
                else:
                    array_population[:, slc] = mtrace.get_values(varname=var)

            population = []
            for i in range(self.n_chains):
                population.append(bij.rmap(array_population[i, :]))

        return population, array_population, likelihoods
Example #2
0
def _init_uw_global_shared(start, global_RVs, global_order):
    start = {v.name: start[v.name] for v in global_RVs}
    bij = pm.DictToArrayBijection(global_order, start)
    u_start = bij.map(start)
    w_start = np.zeros_like(u_start)
    uw_start = np.concatenate([u_start, w_start]).astype(floatX_str)
    uw_global_shared = theano.shared(uw_start, 'uw_global_shared')

    return uw_global_shared, bij
Example #3
0
def _init_uw_global_shared(start, global_RVs):
    global_order = pm.ArrayOrdering([v for v in global_RVs])
    start = {v.name: start[v.name] for v in global_RVs}
    bij = pm.DictToArrayBijection(global_order, start)
    u_start = bij.map(start)
    w_start = np.zeros_like(u_start)
    uw_start = floatX(np.concatenate([u_start, w_start]))
    uw_global_shared = theano.shared(uw_start, 'uw_global_shared')

    return uw_global_shared, bij
Example #4
0
    def __init__(self, model, observed):
        self.model = model
        self.observed = observed

        vars = pm.inputvars(model.cont_vars)

        bij = pm.DictToArrayBijection(pm.ArrayOrdering(vars), model.test_point)
        self.logp = bij.mapf(model.fastlogp)
        self.dlogp = bij.mapf(model.fastdlogp(vars))

        self.num_vars = len(vars)
Example #5
0
    def __init__(self, model):
        """
        Parameters
        ----------
        model : pymc3.Model
            The probability model, written with Theano shared
            variables to form any observations. The Theano shared
            variables are set during inference.
        """
        self.model = model

        vars = pm.inputvars(model.cont_vars)
        self.n_vars = len(vars)

        bij = pm.DictToArrayBijection(pm.ArrayOrdering(vars), model.test_point)
        self.logp = bij.mapf(model.fastlogp)
        self.dlogp = bij.mapf(model.fastdlogp(vars))
Example #6
0
    def __init__(self, model):
        """
        Parameters
        ----------
        model : pymc3.Model
            The probability model, written with Theano shared
            variables to form any observations and with
            `transform=None` for any latent variables. The Theano
            shared variables are set during inference, and all latent
            variables live on their original (constrained) space.
        """
        self.model = model
        self.n_vars = None

        vars = pm.inputvars(model.cont_vars)
        bij = pm.DictToArrayBijection(pm.ArrayOrdering(vars), model.test_point)
        self.logp = bij.mapf(model.fastlogp)
        self.dlogp = bij.mapf(model.fastdlogp(vars))
Example #7
0
def test_leapfrog_reversible():
    n = 3
    start, model, _ = models.non_normal(n)

    with model:
        h = pm.find_hessian(start, model=model)
        step = pm.HamiltonianMC(model.vars, h, model=model)

    bij = pm.DictToArrayBijection(step.ordering, start)

    logp, dlogp = list(map(bij.mapf, step.fs))
    H = Hamiltonian(logp, dlogp, step.potential)

    q0 = bij.map(start)
    p0 = np.ones(n) * .05
    for e in [.01, .1, 1.2]:
        for L in [1, 2, 3, 4, 20]:

            q, p = q0, p0
            q, p = leapfrog(H, q, p, L, e)
            q, p = leapfrog(H, q, -p, L, e)

            close_to(q, q0, 1e-8, str((L, e)))
            close_to(-p, p0, 1e-8, str((L, e)))
Example #8
0
def advi(vars=None, start=None, model=None, n=5000, accurate_elbo=False,
         optimizer=None, learning_rate=.001, epsilon=.1, random_seed=None):
    """Perform automatic differentiation variational inference (ADVI).

    This function implements the meanfield ADVI, where the variational
    posterior distribution is assumed to be spherical Gaussian without
    correlation of parameters and fit to the true posterior distribution.
    The means and standard deviations of the variational posterior are referred
    to as variational parameters.

    The return value of this function is an :code:`ADVIfit` object, which has
    variational parameters. If you want to draw samples from the variational
    posterior, you need to pass the :code:`ADVIfit` object to
    :code:`pymc3.variational.sample_vp()`.

    The variational parameters are defined on the transformed space, which is
    required to do ADVI on an unconstrained parameter space as described in
    [KTR+2016]. The parameters in the :code:`ADVIfit` object are in the
    transformed space, while traces returned by :code:`sample_vp()` are in
    the original space as obtained by MCMC sampling methods in PyMC3.

    The variational parameters are optimized with given optimizer, which is a
    function that returns a dictionary of parameter updates as provided to
    Theano function. If no optimizer is provided, optimization is performed
    with a modified version of adagrad, where only the last (n_window) gradient
    vectors are used to control the learning rate and older gradient vectors
    are ignored. n_window denotes the size of time window and fixed to 10.

    Parameters
    ----------
    vars : object
        Random variables.
    start : Dict or None
        Initial values of parameters (variational means).
    model : Model
        Probabilistic model.
    n : int
        Number of interations updating parameters.
    accurate_elbo : bool
        If true, 100 MC samples are used for accurate calculation of ELBO.
    optimizer : (loss, tensor) -> dict or OrderedDict
        A function that returns parameter updates given loss and parameter
        tensor. If :code:`None` (default), a default Adagrad optimizer is
        used with parameters :code:`learning_rate` and :code:`epsilon` below.
    learning_rate: float
        Base learning rate for adagrad. This parameter is ignored when
        optimizer is given.
    epsilon : float
        Offset in denominator of the scale of learning rate in Adagrad.
        This parameter is ignored when optimizer is given.
    random_seed : int or None
        Seed to initialize random state. None uses current seed.

    Returns
    -------
    ADVIFit
        Named tuple, which includes 'means', 'stds', and 'elbo_vals'.

    'means' is the mean. 'stds' is the standard deviation.
    'elbo_vals' is the trace of ELBO values during optimizaiton.

    References
    ----------
    .. [KTR+2016] Kucukelbir, A., Tran, D., Ranganath, R., Gelman, A.,
        and Blei, D. M. (2016). Automatic Differentiation Variational
        Inference. arXiv preprint arXiv:1603.00788.
    """
    model = pm.modelcontext(model)
    if start is None:
        start = model.test_point

    if vars is None:
        vars = model.vars
    vars = pm.inputvars(vars)

    if not pm.model.all_continuous(vars):
        raise ValueError('Model should not include discrete RVs for ADVI.')

    n_mcsamples = 100 if accurate_elbo else 1

    # Prepare optimizer
    if optimizer is None:
        optimizer = adagrad_optimizer(learning_rate, epsilon)

    # Create variational gradient tensor
    elbo, shared = _calc_elbo(vars, model, n_mcsamples=n_mcsamples,
                              random_seed=random_seed)

    # Set starting values
    for var, share in shared.items():
        share.set_value(start[str(var)])

    order = pm.ArrayOrdering(vars)
    bij = pm.DictToArrayBijection(order, start)
    u_start = bij.map(start)
    w_start = np.zeros_like(u_start)
    uw = np.concatenate([u_start, w_start])

    # Create parameter update function used in the training loop
    uw_shared = theano.shared(uw, 'uw_shared')
    elbo = pm.CallableTensor(elbo)(uw_shared)
    updates = optimizer(loss=-1 * elbo, param=[uw_shared])
    f = theano.function([], [uw_shared, elbo], updates=updates)

    # Optimization loop
    elbos = np.empty(n)
    try:
        progress = trange(n)
        for i in progress:
            uw_i, e = f()
            elbos[i] = e
            if i % (n // 10) == 0 and i > 0:
                avg_elbo = elbos[i - n // 10:i].mean()
                progress.set_description('Average ELBO = {:,.5g}'.format(avg_elbo))
    except KeyboardInterrupt:
        elbos = elbos[:i]
        avg_elbo = elbos[i - n // 10:].mean()
        pm._log.info('Interrupted at {:,d} [{:.0f}%]: Average ELBO = {:,.5g}'.format(
            i, 100 * i // n, avg_elbo))
    else:
        avg_elbo = elbos[-n // 10:].mean()
        pm._log.info('Finished [100%]: Average ELBO = {:,.5g}'.format(avg_elbo))

    # Estimated parameters
    l = int(uw_i.size / 2)
    u = bij.rmap(uw_i[:l])
    w = bij.rmap(uw_i[l:])
    # w is in log space
    for var in w.keys():
        w[var] = np.exp(w[var])

    return ADVIFit(u, w, elbos)
Example #9
0
def advi(vars=None,
         start=None,
         model=None,
         n=5000,
         accurate_elbo=False,
         optimizer=None,
         learning_rate=.001,
         epsilon=.1,
         mode=None,
         tol_obj=0.01,
         eval_elbo=100,
         random_seed=None,
         progressbar=True):
    """Perform automatic differentiation variational inference (ADVI).

    This function implements the meanfield ADVI, where the variational
    posterior distribution is assumed to be spherical Gaussian without
    correlation of parameters and fit to the true posterior distribution.
    The means and standard deviations of the variational posterior are referred
    to as variational parameters.

    The return value of this function is an :code:`ADVIfit` object, which has
    variational parameters. If you want to draw samples from the variational
    posterior, you need to pass the :code:`ADVIfit` object to
    :code:`pymc3.variational.sample_vp()`.

    The variational parameters are defined on the transformed space, which is
    required to do ADVI on an unconstrained parameter space as described in
    [KTR+2016]. The parameters in the :code:`ADVIfit` object are in the
    transformed space, while traces returned by :code:`sample_vp()` are in
    the original space as obtained by MCMC sampling methods in PyMC3.

    The variational parameters are optimized with given optimizer, which is a
    function that returns a dictionary of parameter updates as provided to
    Theano function. If no optimizer is provided, optimization is performed
    with a modified version of adagrad, where only the last (n_window) gradient
    vectors are used to control the learning rate and older gradient vectors
    are ignored. n_window denotes the size of time window and fixed to 10.

    Parameters
    ----------
    vars : object
        Random variables.
    start : Dict or None
        Initial values of parameters (variational means).
    model : Model
        Probabilistic model.
    n : int
        Number of interations updating parameters.
    accurate_elbo : bool
        If true, 100 MC samples are used for accurate calculation of ELBO.
    optimizer : (loss, tensor) -> dict or OrderedDict
        A function that returns parameter updates given loss and parameter
        tensor. If :code:`None` (default), a default Adagrad optimizer is
        used with parameters :code:`learning_rate` and :code:`epsilon` below.
    learning_rate: float
        Base learning rate for adagrad. This parameter is ignored when
        optimizer is given.
    epsilon : float
        Offset in denominator of the scale of learning rate in Adagrad.
        This parameter is ignored when optimizer is given.
    tol_obj : float
        Relative tolerance for testing convergence of ELBO.
    eval_elbo : int
        Window for checking convergence of ELBO. Convergence will be checked
        for every multiple of eval_elbo.
    random_seed : int or None
        Seed to initialize random state. None uses current seed.
    mode :  string or `Mode` instance.
        Compilation mode passed to Theano functions
    progressbar : bool
        Whether or not to display a progress bar in the command line. The
        bar shows the percentage of completion, the sampling speed in
        samples per second (SPS), the estimated remaining time until
        completion ("expected time of arrival"; ETA), and the current ELBO.

    Returns
    -------
    ADVIFit
        Named tuple, which includes 'means', 'stds', and 'elbo_vals'.

    'means' is the mean. 'stds' is the standard deviation.
    'elbo_vals' is the trace of ELBO values during optimizaiton.

    References
    ----------
    .. [KTR+2016] Kucukelbir, A., Tran, D., Ranganath, R., Gelman, A.,
        and Blei, D. M. (2016). Automatic Differentiation Variational
        Inference. arXiv preprint arXiv:1603.00788.
    """
    model = pm.modelcontext(model)
    if start is None:
        start = model.test_point

    if vars is None:
        vars = model.vars
    vars = pm.inputvars(vars)

    if len(vars) == 0:
        raise ValueError('No free random variables to fit.')

    if not pm.model.all_continuous(vars):
        raise ValueError('Model can not include discrete RVs for ADVI.')

    n_mcsamples = 100 if accurate_elbo else 1

    # Prepare optimizer
    if optimizer is None:
        optimizer = adagrad_optimizer(learning_rate, epsilon)

    # Create variational gradient tensor
    elbo, shared = _calc_elbo(vars,
                              model,
                              n_mcsamples=n_mcsamples,
                              random_seed=random_seed)

    # Set starting values
    for var, share in shared.items():
        share.set_value(start[str(var)])

    order = pm.ArrayOrdering(vars)
    bij = pm.DictToArrayBijection(order, start)
    u_start = bij.map(start)
    w_start = np.zeros_like(u_start)
    uw = np.concatenate([u_start, w_start])

    # Create parameter update function used in the training loop
    uw_shared = theano.shared(uw, 'uw_shared')
    elbo = pm.CallableTensor(elbo)(uw_shared)
    updates = optimizer(loss=-1 * elbo, param=[uw_shared])
    f = theano.function([], [uw_shared, elbo], updates=updates, mode=mode)

    # For tracking convergence of ELBO
    window_size = int(max(0.1 * n // eval_elbo, 2.0))
    circ_buff = deque([], maxlen=window_size)

    # Optimization loop
    elbos = np.empty(n)
    divergence_flag = False
    progress = trange(n) if progressbar else range(n)
    try:
        uw_i, elbo_current = f()
        if np.isnan(elbo_current):
            raise FloatingPointError('NaN occurred in ADVI optimization.')
        for i in progress:
            uw_i, e = f()
            if np.isnan(e):
                raise FloatingPointError('NaN occurred in ADVI optimization.')
            elbos[i] = e

            if progressbar:
                if n < 10:
                    progress.set_description('ELBO = {:,.5g}'.format(elbos[i]))
                elif i % (n // 10) == 0 and i > 0:
                    avg_elbo = infmean(elbos[i - n // 10:i])
                    progress.set_description(
                        'Average ELBO = {:,.5g}'.format(avg_elbo))

            if i % eval_elbo == 0:
                elbo_prev = elbo_current
                elbo_current = elbos[i]
                delta_elbo = abs((elbo_current - elbo_prev) / elbo_prev)
                circ_buff.append(delta_elbo)
                avg_delta = np.mean(circ_buff)
                med_delta = np.median(circ_buff)

                if i > 0 and avg_delta < tol_obj:
                    pm._log.info('Mean ELBO converged.')
                    elbos = elbos[:(i + 1)]
                    break
                elif i > 0 and med_delta < tol_obj:
                    pm._log.info('Median ELBO converged.')
                    elbos = elbos[:(i + 1)]
                    break
                if i > 10 * eval_elbo:
                    if med_delta > 0.5 or avg_delta > 0.5:
                        divergence_flag = True
                    else:
                        divergence_flag = False

    except KeyboardInterrupt:
        elbos = elbos[:i]
        if n < 10:
            pm._log.info(
                'Interrupted at {:,d} [{:.0f}%]: ELBO = {:,.5g}'.format(
                    i, 100 * i // n, elbos[i]))
        else:
            avg_elbo = infmean(elbos[i - n // 10:i])
            pm._log.info(
                'Interrupted at {:,d} [{:.0f}%]: Average ELBO = {:,.5g}'.
                format(i, 100 * i // n, avg_elbo))
    else:
        if n < 10:
            pm._log.info('Finished [100%]: ELBO = {:,.5g}'.format(elbos[-1]))
        else:
            avg_elbo = infmean(elbos[-n // 10:])
            pm._log.info(
                'Finished [100%]: Average ELBO = {:,.5g}'.format(avg_elbo))
    finally:
        if progressbar:
            progress.close()

    if divergence_flag:
        pm._log.info('Evidence of divergence detected, inspect ELBO.')

    # Estimated parameters
    l = int(uw_i.size / 2)
    u = bij.rmap(uw_i[:l])
    w = bij.rmap(uw_i[l:])
    # w is in log space
    for var in w.keys():
        w[var] = np.exp(w[var])

    return ADVIFit(u, w, elbos)
Example #10
0
 def bijection(self):
     return pm.DictToArrayBijection(
         pm.ArrayOrdering(pm.inputvars(self.model.cont_vars)),
         self.model.test_point)