def univariate_ess(samples, method='standard_error', **kwargs): r"""Estimate the univariate Effective Sample Size for the samples of every problem. This computes the ESS using: .. math:: ESS(X) = n * \frac{\lambda^{2}}{\sigma^{2}} Where :math:`\lambda` is the standard deviation of the chain and :math:`\sigma` is estimated using the monte carlo standard error (which in turn is, by default, estimated using a batch means estimator). Args: samples (ndarray, dict or generator): either a matrix of shape (d, p, n) with d problems, p parameters and n samples, or a dictionary with for every parameter a matrix with shape (d, n) or, finally, a generator function that yields sample arrays of shape (p, n). method (str): one of 'autocorrelation' or 'standard_error' defaults to 'standard_error'. If 'autocorrelation' is chosen we apply the function: :func:`estimate_univariate_ess_autocorrelation`, if 'standard_error` is choosen we apply the function: :func:`estimate_univariate_ess_standard_error`. **kwargs: passed to the chosen compute method Returns: ndarray: a matrix of size (d, p) with for every problem and every parameter an ESS. References: * Flegal, J.M., Haran, M., and Jones, G.L. (2008). "Markov chain Monte Carlo: Can We Trust the Third Significant Figure?". Statistical Science, 23, p. 250-260. * Marc S. Meketon and Bruce Schmeiser. 1984. Overlapping batch means: something for nothing?. In Proceedings of the 16th conference on Winter simulation (WSC '84), Sallie Sheppard (Ed.). IEEE Press, Piscataway, NJ, USA, 226-230. """ samples_generator = _get_sample_generator(samples) return np.array( multiprocess_mapping(_UnivariateESSMultiProcessing(method, **kwargs), samples_generator()))
def multivariate_ess(samples, batch_size_generator=None): r"""Estimate the multivariate Effective Sample Size for the samples of every problem. This essentially applies :func:`estimate_multivariate_ess` to every problem. Args: samples (ndarray, dict or generator): either a matrix of shape (d, p, n) with d problems, p parameters and n samples, or a dictionary with for every parameter a matrix with shape (d, n) or, finally, a generator function that yields sample arrays of shape (p, n). batch_size_generator (MultiVariateESSBatchSizeGenerator): the batch size generator, tells us how many batches and of which size we use in estimating the minimum ESS. Returns: ndarray: the multivariate ESS per problem """ samples_generator = _get_sample_generator(samples) return np.array( multiprocess_mapping( _MultivariateESSMultiProcessing(batch_size_generator), samples_generator()))
def fit_truncated_gaussian(samples, lower_bounds, upper_bounds): """Fits a truncated gaussian distribution on the given samples. This will do a maximum likelihood estimation of a truncated Gaussian on the provided samples, with the truncation points given by the lower and upper bounds. Args: samples (ndarray): a one or two dimensional array. If one dimensional we fit the truncated Gaussian on all values. If two dimensional, we calculate the truncated Gaussian for every set of samples over the first dimension. lower_bounds (ndarray or float): the lower bound, either a scalar or a lower bound per problem (first index of samples) upper_bounds (ndarray or float): the upper bound, either a scalar or an upper bound per problem (first index of samples) Returns: mean, std: the mean and std of the fitted truncated Gaussian """ if len(samples.shape) == 1: return _TruncatedNormalFitter()((samples, lower_bounds, upper_bounds)) def item_generator(): for ind in range(samples.shape[0]): if is_scalar(lower_bounds): lower_bound = lower_bounds else: lower_bound = lower_bounds[ind] if is_scalar(upper_bounds): upper_bound = upper_bounds else: upper_bound = upper_bounds[ind] yield (samples[ind], lower_bound, upper_bound) results = np.array( multiprocess_mapping(_TruncatedNormalFitter(), item_generator())) return results[:, 0], results[:, 1]
def gaussian_overlapping_coefficient(means_0, stds_0, means_1, stds_1, lower=None, upper=None): """Compute the overlapping coefficient of two Gaussian continuous_distributions. This computes the :math:`\int_{-\infty}^{\infty}{\min(f(x), g(x))\partial x}` where :math:`f \sim \mathcal{N}(\mu_0, \sigma_0^{2})` and :math:`f \sim \mathcal{N}(\mu_1, \sigma_1^{2})` are normally distributed variables. This will compute the overlap for each element in the first dimension. Args: means_0 (ndarray): the set of means of the first distribution stds_0 (ndarray): the set of stds of the fist distribution means_1 (ndarray): the set of means of the second distribution stds_1 (ndarray): the set of stds of the second distribution lower (float): the lower limit of the integration. If not set we set it to -inf. upper (float): the upper limit of the integration. If not set we set it to +inf. """ if lower is None: lower = -np.inf if upper is None: upper = np.inf def point_iterator(): for ind in range(means_0.shape[0]): yield np.squeeze(means_0[ind]), np.squeeze( stds_0[ind]), np.squeeze(means_1[ind]), np.squeeze(stds_1[ind]) return np.array( list( multiprocess_mapping(_ComputeGaussianOverlap(lower, upper), point_iterator())))