def test_logpdf(self): distro = Distribution( uniform.rvs(loc=0, scale=1, size=100, random_state=np.random.RandomState(1))) assert_almost_equal(distro.logpdf(0), np.log([0.5805]), decimal=4)
def test_str_normal(self): distro = Distribution( norm.rvs(loc=2, scale=0.0001, size=100, random_state=np.random.RandomState(2))) assert_equal(distro.__str__(), '(2.000+/-0.000)e0')
def test_negative_pdf(self): distro = Distribution( uniform.rvs(loc=0, scale=1, size=100, random_state=np.random.RandomState(1))) assert_almost_equal(distro.negative_pdf(0), [-0.5805], decimal=4)
def test_str_normal(self): distro = Distribution( norm.rvs(loc=0, scale=1, size=100, random_state=np.random.RandomState(2))) assert_equal(distro.__str__(), '(-6.537+/-175.337)e-2')
def test_repr_uniform_diff_precision(self): distro = Distribution( uniform.rvs(loc=0, scale=1, size=100, random_state=np.random.RandomState(2))) assert_equal(distro.__repr__(precision=2), '(4.36(+5.35/-4.10))e-1')
def test_repr_uniform(self): distro = Distribution( uniform.rvs(loc=0, scale=1, size=100, random_state=np.random.RandomState(2))) assert_equal(distro.__repr__(), '(4.364(+5.354/-4.098))e-1')
def test_repr_normal_diff_precision(self): distro = Distribution( norm.rvs(loc=0, scale=1, size=100, random_state=np.random.RandomState(2))) assert_equal(distro.__repr__(precision=2), '(-6.54+/-175.34)e-2')
def test_add_samples(self): samples = norm.rvs(loc=0, scale=2, size=100, random_state=np.random.RandomState(2)) distro = Distribution(samples) distro.add_samples(samples) assert_equal(distro.size, 200)
def __init__(self, function, abscissa, ordinate, bounds=None, ordinate_error=None): """ Initialisation function for a :py:class:`~uravu.relationship.Relationship` object. """ self.function = function self.abscissa = abscissa potential_y = [] for i, y in enumerate(ordinate): if not isinstance(y, Distribution): if not isinstance(y, stats._distn_infrastructure.rv_frozen): if ordinate_error is not None: potential_y.append( Distribution( stats.norm.rvs(loc=y, scale=ordinate_error[i], size=5000))) else: raise ValueError( "uravu ordinate should be a list of uravu.distribution.Distribution objects or an ordinate_error should be given." ) else: potential_y.append(Distribution(y.rvs(size=5000))) self.ordinate = Axis(potential_y) else: self.ordinate = Axis(ordinate) if abscissa.shape[0] != len(ordinate): raise ValueError( "The number of data points in the abscissa does not match that for the ordinate." ) self.bounds = bounds self.variables = [] if bounds is not None: if len(self.bounds) != self.len_parameters or not isinstance( bounds[0], tuple): raise ValueError( "The number of bounds does not match the number of parameters" ) for i, b in enumerate(self.bounds): self.variables.append( Distribution( stats.uniform.rvs(loc=b[0], scale=b[1] - b[0], size=500))) else: for i in range(self.len_parameters): self.variables.append(Distribution(1)) self.ln_evidence = None self.mcmc_results = None self.nested_sampling_results = None
def test_not_normal(self): distro = Distribution( uniform.rvs(loc=0, scale=1, size=100, random_state=np.random.RandomState(1))) assert_equal(distro.normal, False)
def test_v_uniform(self): distro = Distribution( uniform.rvs(loc=0, scale=2, size=100, random_state=np.random.RandomState(2))) assert_equal(distro.v, None)
def test_v(self): distro = Distribution( norm.rvs(loc=0, scale=2, size=100, random_state=np.random.RandomState(2))) assert_almost_equal(distro.v, 4, decimal=0)
def test_dist_max(self): distro = Distribution( norm.rvs(loc=0, scale=1, size=100, random_state=np.random.RandomState(1))) assert_almost_equal(distro.dist_max, 0, decimal=1)
def test_normal_latex(self): """ Test the latex print from a normal distribution """ np.random.seed(1) d = Distribution(scipy.stats.norm.rvs(loc=0, scale=0.5, size=2000)) assert_equal(utils.latex(d), r'$1.725e-02\pm{9.665e-01}$')
def test_con_int(self): distro = Distribution( norm.rvs(loc=0, scale=2, size=10000, random_state=np.random.RandomState(2))) assert_almost_equal(distro.con_int[0], -2 * 1.96, decimal=1) assert_almost_equal(distro.con_int[1], 2 * 1.96, decimal=1)
def test_init_kde_size_change(self): distro2 = Distribution( norm.rvs(loc=1, scale=1, size=1000, random_state=np.random.RandomState(2))) AX = Axis([DISTRO1, distro2]) assert_equal(AX.values[1].samples, distro2.samples)
def test_not_normal_latex(self): """ Test the latex print from a non-normal distribution """ np.random.seed(1) d = Distribution( scipy.stats.lognorm.rvs(loc=10, scale=0.5, s=10, size=2000)) assert_equal(utils.latex(d), r'$1.071e+01^{+1.754e+08}_{-7.060e-01}$')
def max_likelihood(self, method, x0=None, **kwargs): """ Determine values for the variables which maximise the likelihood for the :py:class:`~uravu.relationship.Relationship`. For keyword arguments see the :func:`scipy.optimize.minimize()` documentation. Args: x0 (:py:attr:`array_like`): Initial guess values for the parameters. """ var = optimize.max_ln_likelihood(self, method, x0, **kwargs) for i, v in enumerate(var): self.variables[i] = Distribution(v)
def mcmc(relationship, prior_function=None, walkers=50, n_samples=500, n_burn=500, progress=True): """ Perform MCMC to get the probability distributions for the variables of the relationship. Args: relationship (:py:class:`uravu.relationship.Relationship`): The relationship to determine the posteriors of. prior_function (:py:attr:`callable`, optional): The function to populated some prior distributions. Default is :func:`uravu.relationship.Relationship.prior()`. walkers (:py:attr:`int`, optional): Number of MCMC walkers. Default is :py:attr:`50`. n_samples (:py:attr:`int`, optional): Number of sample points. Default is :py:attr:`500`. n_burn (:py:attr:`int`, optional): Number of burn in samples. Default is :py:attr:`500`. progress (:py:attr:`bool`, optional): Show tqdm progress for sampling. Default is :py:attr:`True`. Returns: :py:attr:`dict`: Dictionary with the distributions as a list (:py:attr:`'distributions'`), the chain (:py:attr:`'chain'`) and the samples as an :py:attr:`array_like` (:py:attr:`'samples'`). """ if prior_function is None: prior_function = relationship.prior initial_prior = np.zeros((walkers, len(relationship.variable_medians))) called_prior = prior_function() ndims = len(relationship.variable_medians) for i in range(ndims): if relationship.variable_medians[i] != 0: initial_prior[:, i] = relationship.variable_medians[ i] + 1e-2 * np.random.randn( walkers) * relationship.variable_medians[i] else: initial_prior[:, i] = 1e-4 * np.random.randn(walkers) args = [ relationship.function, relationship.abscissa, relationship.ordinate, called_prior ] sampler = emcee.EnsembleSampler(walkers, ndims, ln_probability, args=args) sampler.run_mcmc(initial_prior, n_samples + n_burn, progress=progress) post_samples = sampler.get_chain(discard=n_burn).reshape((-1, ndims)) distributions = [] for i in range(ndims): distributions.append(Distribution(post_samples[:, i])) results = { "distributions": distributions, "chain": sampler.get_chain().reshape((-1, ndims)), "samples": post_samples } return results
def test_mcmc_with_variable_median_zero(self): test_rel = relationship.Relationship(utils.straight_line, TEST_X, TEST_Y) test_rel.variables[0] = Distribution(np.zeros((7))) actual_results = sampling.mcmc(test_rel, n_burn=10, n_samples=10) assert_equal( isinstance(actual_results["distributions"][0], Distribution), True) assert_equal( isinstance(actual_results["distributions"][1], Distribution), True) assert_equal(actual_results["distributions"][0].size, 500) assert_equal(actual_results["distributions"][1].size, 500)
def _sample_until_normal(array, n_samples, n_resamples, max_resamples, confidence_interval): """ Resample from the distribution until a normal distribution is obtained or a maximum is reached. Args: array (:py:attr:`array_like`): The array to sample from. n_samples (:py:attr:`int`): Number of samples. r_resamples (:py:attr:`int`): Number of resamples to perform initially. max_resamples (:py:attr:`int`): The maximum number of resamples to perform. confidence_interval (:py:attr:`array_like`): The percentile points of the distribution that should be stored. Returns: :py:class:`uravu.distribution.Distribution`: The resampled distribution. """ distro = Distribution(_bootstrap(array.flatten(), n_samples, n_resamples), ci_points=confidence_interval) while (not distro.normal) and distro.size < max_resamples: distro.add_samples(_bootstrap(array.flatten(), n_samples, 100)) if distro.size >= max_resamples: warnings.warn("The maximum number of resamples has been reached, and the distribution is not yet normal.") return distro
def diffusion(self, n_samples=10000, fit_intercept=True): """ Calculate the diffusion coefficient for the trajectory. Args: n_samples (:py:attr:`int`, optional): The number of samples in the random generator. Default is :py:attr:`10000`. fit_intercept (:py:attr:`bool`, optional): Should the intercept of the diffusion relationship be fit. Default is :py:attr:`True`. """ cov = corr2cov(self.correlation_matrix, self.msd_sampled_std[np.argmax(self.ngp):]) single_msd = multivariate_normal(self.msd_sampled[np.argmax(self.ngp):], cov, allow_singular=True) single_msd_samples = single_msd.rvs(n_samples) A = np.array([self.dt[np.argmax(self.ngp):]]).T if fit_intercept: A = np.array([np.ones(self.dt[np.argmax(self.ngp):].size), self.dt[np.argmax(self.ngp):]]).T Y = single_msd_samples.T straight_line = np.matmul(np.linalg.inv(np.matmul(A.T, np.matmul(np.linalg.inv(cov), A))), np.matmul(A.T, np.matmul(np.linalg.inv(cov), Y))) if fit_intercept: intercept, gradient = straight_line self.diffusion_coefficient = Distribution(gradient / 6, ci_points=self.confidence_interval) self.intercept = Distribution(intercept, ci_points=self.confidence_interval) else: self.diffusion_coefficient = Distribution(straight_line[0] / 6, ci_points=self.confidence_interval)
def __init__(self, delta_t, disp_3d, n_resamples=1000, sub_sample_dt=1, confidence_interval=None, max_resamples=10000, bootstrap_multiplier=1, progress=True, ngp_errors=False): super().__init__(delta_t, disp_3d, sub_sample_dt, confidence_interval, progress) self.msd_observed = np.array([]) self.msd_sampled = np.array([]) self.msd_sampled_err = np.array([]) self.msd_sampled_std = np.array([]) self.ngp = np.array([]) if ngp_errors: self.ngp_err = np.array([]) self.euclidian_displacements = [] samples = np.zeros((self.displacements[0].shape[0], len(self.displacements))) for i in self.iterator: d_squared = np.sum(self.displacements[i] ** 2, axis=2) self.euclidian_displacements.append(Distribution(np.sqrt(d_squared.flatten()))) samples[:, i] = d_squared.mean(axis=1).flatten() n_samples_msd = _n_samples(self.displacements[i].shape, self.max_obs, bootstrap_multiplier) if n_samples_msd <= 1: continue self.msd_observed = np.append(self.msd_observed, np.mean(d_squared.flatten())) distro = _sample_until_normal(d_squared, n_samples_msd, n_resamples, max_resamples, self.confidence_interval) if ngp_errors: distro4 = _sample_until_normal(d_squared * d_squared, n_samples_msd, n_resamples, max_resamples, self.confidence_interval) self.distributions_4.append(distro4) top = distro4.samples[np.random.choice(distro4.size, size=1000)] * 3 bottom = np.square(distro.samples[np.random.choice(distro.size, size=1000)]) * 5 ngp_d = Distribution(top / bottom - 1, ci_points=self.confidence_interval) self.ngp = np.append(self.ngp, ngp_d.n) self.ngp_err = np.append(self.ngp_err, distro4.n - distro4.con_int[0]) else: top = np.mean(d_squared.flatten() * d_squared.flatten()) * 3 bottom = np.square(np.mean(d_squared.flatten())) * 5 self.ngp = np.append(self.ngp, top / bottom - 1) self.dt = np.append(self.dt, self.delta_t[i]) self.distributions.append(distro) self.msd_sampled = np.append(self.msd_sampled, distro.n) self.msd_sampled_err = np.append(self.msd_sampled_err, distro.n - distro.con_int[0]) self.msd_sampled_std = np.append(self.msd_sampled_std, np.std(distro.samples)) self.correlation_matrix = np.array(pd.DataFrame(samples[np.argmax(self.ngp):, np.argmax(self.ngp):]).corr())
def test_correlation_matrix(self): """ Test correlation_matrix function. """ TEST_Y = [] for i in np.arange(1, 9, 1): TEST_Y.append( Distribution(scipy.stats.norm.rvs(loc=i, scale=0.5, size=200))) TEST_X = np.arange(1, 9, 1) test_rel = Relationship(utils.straight_line, TEST_X, TEST_Y) test_rel.max_likelihood('mini') test_rel.mcmc(n_burn=10, n_samples=10) actual_matrix = utils.correlation_matrix(test_rel) assert_equal(actual_matrix.shape, (2, 2)) assert_almost_equal(actual_matrix[1, 0], actual_matrix[0, 1]) assert_almost_equal(actual_matrix[0, 0], 1.0) assert_almost_equal(actual_matrix[1, 1], 1.0) assert_equal(test_rel.mcmc_done, True)
def nested_sampling(relationship, prior_function=None, progress=True, dynamic=False, **kwargs): """ Perform the nested sampling, or dynamic nested sampling, in order to determine the Bayesian natural log evidence. See the :py:func:`dynesty.NestedSampler.run_nested()` documentation. Args: relationship (:py:class:`~uravu.relationship.Relationship`): The relationship to estimate the evidence for. prior_function (:py:attr:`callable`, optional): The function to populated some prior distributions. Default is the broad uniform priors in :func:`~uravu.relationship.Relationship.prior()`. progress (:py:attr:`bool`, optional): Show :py:mod:`tqdm` progress for sampling. Default is :py:attr:`True`. dynamic (:py:attr:`bool`, optional): Should dynamic nested sampling be used?. Default is :py:attr:`False`. Returns: :py:attr:`dict`: The results from :py:func:`dynesty.NestedSampler.run_nested()`. """ if prior_function is None: prior_function = relationship.prior priors = prior_function() nested_sampler = dynesty.NestedSampler if dynamic: nested_sampler = dynesty.DynamicNestedSampler logl_args = [ relationship.function, relationship.abscissa, relationship.ordinate ] sampler = nested_sampler(optimize.ln_likelihood, nested_prior, len(relationship.variables), logl_args=logl_args, ptform_args=[priors]) sampler.run_nested(print_progress=progress, **kwargs) results = sampler.results samples = results['samples'] weights = np.exp(results['logwt'] - results['logz'][-1]) new_samples = dyfunc.resample_equal(samples, weights) distributions = [] for i in range(new_samples.shape[1]): distributions.append(Distribution(new_samples[:, i])) results['distributions'] = distributions return results
def test_size(self): distro = Distribution(uniform.rvs(loc=0, scale=1, size=100)) assert_equal(distro.size, 100)
""" # Copyright (c) Andrew R. McCluskey # Distributed under the terms of the MIT License # author: Andrew R. McCluskey import unittest import numpy as np from numpy.testing import assert_almost_equal, assert_equal from uravu.distribution import Distribution import scipy.stats from uravu.axis import Axis from scipy.stats import norm, uniform, gaussian_kde DISTRO1 = Distribution( norm.rvs(loc=0, scale=1, size=10000, random_state=np.random.RandomState(1))) DISTRO2 = Distribution( norm.rvs(loc=1, scale=1, size=10000, random_state=np.random.RandomState(2))) AX = Axis([DISTRO1, DISTRO2]) AX_ARRAY = Axis([0, 1]) class TestDistribution(unittest.TestCase): """ Testing the Axis class. """ def test_init_values(self): assert_equal(AX.values[0].samples, DISTRO1.samples)
# author: Andrew R. McCluskey import unittest import numpy as np import scipy.stats from uncertainties import unumpy as unp from numpy.testing import assert_almost_equal, assert_equal from uravu import utils from uravu.relationship import Relationship from uravu.distribution import Distribution from uravu.axis import Axis TEST_Y = [] for i in np.arange(1, 9, 1): TEST_Y.append( Distribution(scipy.stats.norm.rvs(loc=i, scale=0.5, size=200))) TEST_X = np.arange(1, 9, 1) class TestRelationship(unittest.TestCase): """ Tests for the relationship module and class. """ def test_function_init(self): r = Relationship(utils.straight_line, TEST_X, TEST_Y) assert_equal(r.function, utils.straight_line) def test_abscissa_init(self): r = Relationship(utils.straight_line, TEST_X, TEST_Y) assert_equal(isinstance(r.abscissa, np.ndarray), True) assert_equal(r.abscissa, TEST_X)
def test_max(self): distro = Distribution(np.linspace(1, 10, 100)) assert_equal(distro.max, 10)
def test_init_ci_points_error(self): with self.assertRaises(ValueError): distro = Distribution(norm.rvs(loc=0, scale=1, size=1000), ci_points=[5, 50, 95])