コード例 #1
0
ファイル: mcmc_diagnostics.py プロジェクト: EricHughesABC/MOT
def univariate_ess(samples, method='standard_error', **kwargs):
    r"""Estimate the univariate Effective Sample Size for the samples of every problem.

    This computes the ESS using:

        .. math::

            ESS(X) = n * \frac{\lambda^{2}}{\sigma^{2}}

        Where :math:`\lambda` is the standard deviation of the chain and :math:`\sigma` is estimated using the
        monte carlo standard error (which in turn is, by default, estimated using a batch means estimator).

    Args:
        samples (ndarray, dict or generator): either a matrix of shape (d, p, n) with d problems, p parameters and
            n samples, or a dictionary with for every parameter a matrix with shape (d, n) or, finally,
            a generator function that yields sample arrays of shape (p, n).
        method (str): one of 'autocorrelation' or 'standard_error' defaults to 'standard_error'.
            If 'autocorrelation' is chosen we apply the function: :func:`estimate_univariate_ess_autocorrelation`,
            if 'standard_error` is choosen we apply the function: :func:`estimate_univariate_ess_standard_error`.
        **kwargs: passed to the chosen compute method

    Returns:
        ndarray: a matrix of size (d, p) with for every problem and every parameter an ESS.

    References:
        * Flegal, J.M., Haran, M., and Jones, G.L. (2008). "Markov chain Monte Carlo: Can We
          Trust the Third Significant Figure?". Statistical Science, 23, p. 250-260.
        * Marc S. Meketon and Bruce Schmeiser. 1984. Overlapping batch means: something for nothing?.
          In Proceedings of the 16th conference on Winter simulation (WSC '84), Sallie Sheppard (Ed.).
          IEEE Press, Piscataway, NJ, USA, 226-230.
    """
    samples_generator = _get_sample_generator(samples)
    return np.array(
        multiprocess_mapping(_UnivariateESSMultiProcessing(method, **kwargs),
                             samples_generator()))
コード例 #2
0
ファイル: mcmc_diagnostics.py プロジェクト: EricHughesABC/MOT
def multivariate_ess(samples, batch_size_generator=None):
    r"""Estimate the multivariate Effective Sample Size for the samples of every problem.

    This essentially applies :func:`estimate_multivariate_ess` to every problem.

    Args:
        samples (ndarray, dict or generator): either a matrix of shape (d, p, n) with d problems, p parameters and
            n samples, or a dictionary with for every parameter a matrix with shape (d, n) or, finally,
            a generator function that yields sample arrays of shape (p, n).
        batch_size_generator (MultiVariateESSBatchSizeGenerator): the batch size generator, tells us how many
            batches and of which size we use in estimating the minimum ESS.

    Returns:
        ndarray: the multivariate ESS per problem
    """
    samples_generator = _get_sample_generator(samples)
    return np.array(
        multiprocess_mapping(
            _MultivariateESSMultiProcessing(batch_size_generator),
            samples_generator()))
コード例 #3
0
def fit_truncated_gaussian(samples, lower_bounds, upper_bounds):
    """Fits a truncated gaussian distribution on the given samples.

    This will do a maximum likelihood estimation of a truncated Gaussian on the provided samples, with the
    truncation points given by the lower and upper bounds.

    Args:
        samples (ndarray): a one or two dimensional array. If one dimensional we fit the truncated Gaussian on all
            values. If two dimensional, we calculate the truncated Gaussian for every set of samples over the
            first dimension.
        lower_bounds (ndarray or float): the lower bound, either a scalar or a lower bound per problem (first index of
            samples)
        upper_bounds (ndarray or float): the upper bound, either a scalar or an upper bound per problem (first index of
            samples)

    Returns:
        mean, std: the mean and std of the fitted truncated Gaussian
    """
    if len(samples.shape) == 1:
        return _TruncatedNormalFitter()((samples, lower_bounds, upper_bounds))

    def item_generator():
        for ind in range(samples.shape[0]):
            if is_scalar(lower_bounds):
                lower_bound = lower_bounds
            else:
                lower_bound = lower_bounds[ind]

            if is_scalar(upper_bounds):
                upper_bound = upper_bounds
            else:
                upper_bound = upper_bounds[ind]

            yield (samples[ind], lower_bound, upper_bound)

    results = np.array(
        multiprocess_mapping(_TruncatedNormalFitter(), item_generator()))
    return results[:, 0], results[:, 1]
コード例 #4
0
def gaussian_overlapping_coefficient(means_0,
                                     stds_0,
                                     means_1,
                                     stds_1,
                                     lower=None,
                                     upper=None):
    """Compute the overlapping coefficient of two Gaussian continuous_distributions.

    This computes the :math:`\int_{-\infty}^{\infty}{\min(f(x), g(x))\partial x}` where
    :math:`f \sim \mathcal{N}(\mu_0, \sigma_0^{2})` and :math:`f \sim \mathcal{N}(\mu_1, \sigma_1^{2})` are normally
    distributed variables.

    This will compute the overlap for each element in the first dimension.

    Args:
        means_0 (ndarray): the set of means of the first distribution
        stds_0 (ndarray): the set of stds of the fist distribution
        means_1 (ndarray): the set of means of the second distribution
        stds_1 (ndarray): the set of stds of the second distribution
        lower (float): the lower limit of the integration. If not set we set it to -inf.
        upper (float): the upper limit of the integration. If not set we set it to +inf.
    """
    if lower is None:
        lower = -np.inf
    if upper is None:
        upper = np.inf

    def point_iterator():
        for ind in range(means_0.shape[0]):
            yield np.squeeze(means_0[ind]), np.squeeze(
                stds_0[ind]), np.squeeze(means_1[ind]), np.squeeze(stds_1[ind])

    return np.array(
        list(
            multiprocess_mapping(_ComputeGaussianOverlap(lower, upper),
                                 point_iterator())))