Beispiel #1
0
    def test_fit_only_loc_scale(self):
        # fit only loc
        N = 5000
        rng = np.random.default_rng(self.seed)

        dist = stats.norm
        loc, scale = 1.5, 1
        data = dist.rvs(loc=loc, size=N, random_state=rng)
        loc_bounds = (0, 5)
        bounds = {'loc': loc_bounds}
        res = stats.fit(dist, data, bounds, optimizer=self.opt)
        assert_allclose(res.params, (loc, scale), **self.tols)

        # fit only scale
        loc, scale = 0, 2.5
        data = dist.rvs(scale=scale, size=N, random_state=rng)
        scale_bounds = (0, 5)
        bounds = {'scale': scale_bounds}
        res = stats.fit(dist, data, bounds, optimizer=self.opt)
        assert_allclose(res.params, (loc, scale), **self.tols)

        # fit only loc and scale
        dist = stats.norm
        loc, scale = 1.5, 2.5
        data = dist.rvs(loc=loc, scale=scale, size=N, random_state=rng)
        bounds = {'loc': loc_bounds, 'scale': scale_bounds}
        res = stats.fit(dist, data, bounds, optimizer=self.opt)
        assert_allclose(res.params, (loc, scale), **self.tols)
Beispiel #2
0
    def test_basic_fit(self, dist_name):

        N = 5000
        dist_data = dict(distcont + distdiscrete)
        rng = np.random.default_rng(self.seed)
        dist = getattr(stats, dist_name)
        shapes = np.array(dist_data[dist_name])
        bounds = np.empty((len(shapes) + 2, 2), dtype=np.float64)
        bounds[:-2, 0] = shapes / 10  # essentially all shapes are > 0
        bounds[:-2, 1] = shapes * 10
        bounds[-2] = (0, 10)
        bounds[-1] = (0, 10)
        loc = rng.uniform(*bounds[-2])
        scale = rng.uniform(*bounds[-1])
        ref = list(dist_data[dist_name]) + [loc, scale]

        if getattr(dist, 'pmf', False):
            ref = ref[:-1]
            ref[-1] = np.floor(loc)
            data = dist.rvs(*ref, size=N, random_state=rng)
            res = stats.fit(dist, data, bounds[:-1], optimizer=self.opt)
        if getattr(dist, 'pdf', False):
            data = dist.rvs(*ref, size=N, random_state=rng)
            res = stats.fit(dist, data, bounds, optimizer=self.opt)

        assert_allclose(res.params, ref, **self.tols)
Beispiel #3
0
    def test_dist_iv(self):
        message = "`dist` must be an instance of..."
        with pytest.raises(ValueError, match=message):
            stats.fit(10, self.data, self.shape_bounds_a)

        message = "Distribution `laplace` is not yet supported by..."
        with pytest.raises(ValueError, match=message):
            stats.fit(stats.laplace, self.data)
Beispiel #4
0
    def test_guess(self):
        # Test that guess helps DE find the desired solution
        N = 2000
        rng = np.random.default_rng(self.seed)
        dist = stats.nhypergeom
        params = (20, 7, 12, 0)
        bounds = [(2, 200), (0.7, 70), (1.2, 120), (0, 10)]

        data = dist.rvs(*params, size=N, random_state=rng)

        res = stats.fit(dist, data, bounds, optimizer=self.opt)
        assert not np.allclose(res.params, params, **self.tols)

        res = stats.fit(dist, data, bounds, guess=params, optimizer=self.opt)
        assert_allclose(res.params, params, **self.tols)
Beispiel #5
0
    def test_basic_fit(self, dist_name):

        N = 5000
        dist_data = dict(distcont + distdiscrete)
        rng = np.random.default_rng(self.seed)
        dist = getattr(stats, dist_name)
        shapes = np.array(dist_data[dist_name])
        bounds = np.empty((len(shapes) + 2, 2), dtype=np.float64)
        bounds[:-2, 0] = shapes / 10**np.sign(shapes)
        bounds[:-2, 1] = shapes * 10**np.sign(shapes)
        bounds[-2] = (0, 10)
        bounds[-1] = (0, 10)
        loc = rng.uniform(*bounds[-2])
        scale = rng.uniform(*bounds[-1])
        ref = list(dist_data[dist_name]) + [loc, scale]

        if getattr(dist, 'pmf', False):
            ref = ref[:-1]
            ref[-1] = np.floor(loc)
            data = dist.rvs(*ref, size=N, random_state=rng)
            bounds = bounds[:-1]
        if getattr(dist, 'pdf', False):
            data = dist.rvs(*ref, size=N, random_state=rng)

        with npt.suppress_warnings() as sup:
            sup.filter(RuntimeWarning, "overflow encountered")
            res = stats.fit(dist, data, bounds, optimizer=self.opt)

        assert_nllf_less_or_close(dist, data, res.params, ref, **self.tols)
Beispiel #6
0
 def test_nchypergeom_wallenius(self):
     # The NC hypergeometric distributions are more challenging
     N = 5000
     rng = np.random.default_rng(self.seed)
     dist = stats.nchypergeom_wallenius
     shapes = (14, 8, 6, 0.5)
     data = dist.rvs(*shapes, size=N, random_state=rng)
     shape_bounds = [(0, 20), (0, 10), (0, 10), (0, 0.5)]
     res = stats.fit(dist, data, shape_bounds, optimizer=self.opt)
     assert_allclose(res.params[:-1], shapes, **self.tols)
Beispiel #7
0
 def test_yulesimon(self):
     # yulesimon fit is not very sensitive to alpha except for small alpha
     N = 5000
     rng = np.random.default_rng(self.seed)
     dist = stats.yulesimon
     params = (1.5, 4)
     data = dist.rvs(*params, size=N, random_state=rng)
     bounds = [(0.15, 15), (0, 10)]
     res = stats.fit(dist, data, bounds, optimizer=self.opt)
     assert_allclose(res.params, params, **self.tols)
Beispiel #8
0
 def test_randint(self):
     # randint is overparameterized; test_basic_fit finds equally good fit
     N = 5000
     rng = np.random.default_rng(self.seed)
     dist = stats.randint
     shapes = (7, 31)
     data = dist.rvs(*shapes, size=N, random_state=rng)
     shape_bounds = [(0, 70), (0, 310)]
     res = stats.fit(dist, data, shape_bounds, optimizer=self.opt)
     assert_allclose(res.params[:2], shapes, **self.tols)
Beispiel #9
0
 def test_nbinom(self):
     # Fitting nbinom doesn't always get original shapes if loc is free
     N = 7000
     rng = np.random.default_rng(self.seed)
     dist = stats.nbinom
     shapes = (5, 0.5)
     data = dist.rvs(*shapes, size=N, random_state=rng)
     shape_bounds = [(0.5, 50), (0.05, 5)]
     res = stats.fit(dist, data, shape_bounds, optimizer=self.opt)
     assert_allclose(res.params[:-1], shapes, **self.tols)
Beispiel #10
0
 def test_nhypergeom(self):
     # DE doesn't find optimum for the bounds in `test_basic_fit`. NBD.
     N = 2000
     rng = np.random.default_rng(self.seed)
     dist = stats.nhypergeom
     shapes = (20, 7, 12)
     data = dist.rvs(*shapes, size=N, random_state=rng)
     shape_bounds = [(0, 30)] * 3
     res = stats.fit(dist, data, shape_bounds, optimizer=self.opt)
     assert_allclose(res.params[:-1], (20, 7, 12), **self.tols)
Beispiel #11
0
 def test_hypergeom(self):
     # hypergeometric distribution (M, n, N) \equiv (M, N, n)
     N = 1000
     rng = np.random.default_rng(self.seed)
     dist = stats.hypergeom
     shapes = (20, 7, 12)
     data = dist.rvs(*shapes, size=N, random_state=rng)
     shape_bounds = [(0, 30)] * 3
     res = stats.fit(dist, data, shape_bounds, optimizer=self.opt)
     assert_allclose(res.params[:-1], shapes, **self.tols)
Beispiel #12
0
    def test_data_iv(self):
        message = "`data` must be exactly one-dimensional."
        with pytest.raises(ValueError, match=message):
            stats.fit(self.dist, [[1, 2, 3]], self.shape_bounds_a)

        message = "All elements of `data` must be finite numbers."
        with pytest.raises(ValueError, match=message):
            stats.fit(self.dist, [1, 2, 3, np.nan], self.shape_bounds_a)
        with pytest.raises(ValueError, match=message):
            stats.fit(self.dist, [1, 2, 3, np.inf], self.shape_bounds_a)
        with pytest.raises(ValueError, match=message):
            stats.fit(self.dist, ['1', '2', '3'], self.shape_bounds_a)
Beispiel #13
0
 def test_boltzmann(self):
     # Boltzmann distribution shape is very insensitive to parameter N
     N = 1000
     rng = np.random.default_rng(self.seed)
     dist = stats.boltzmann
     shapes = (1.4, 19, 4)
     data = dist.rvs(*shapes, size=N, random_state=rng)
     bounds = [(0, 30)] * 2 + [(0, 10)]
     res = stats.fit(dist, data, bounds, optimizer=self.opt)
     assert_allclose(res.params[0], 1.4, **self.tols)
     assert_allclose(res.params[2], 4, **self.tols)
Beispiel #14
0
    def test_missing_shape_bounds(self):
        # some distributions have a small domain w.r.t. a parameter, e.g.
        # $p \in [0, 1]$ for binomial distribution
        # User does not need to provide these because the intersection of the
        # user's bounds (none) and the distribution's domain is finite
        N = 1000
        rng = np.random.default_rng(self.seed)

        dist = stats.binom
        n, p, loc = 10, 0.65, 0
        data = dist.rvs(n, p, loc=loc, size=N, random_state=rng)
        shape_bounds = {'n': np.array([0, 20])}  # check arrays are OK, too
        res = stats.fit(dist, data, shape_bounds, optimizer=self.opt)
        assert_allclose(res.params, (n, p, loc), **self.tols)

        dist = stats.bernoulli
        p, loc = 0.314159, 0
        data = dist.rvs(p, loc=loc, size=N, random_state=rng)
        res = stats.fit(dist, data, optimizer=self.opt)
        assert_allclose(res.params, (p, loc), **self.tols)
Beispiel #15
0
    def test_truncweibull_min(self):
        # Can't guarantee that all distributions will fit all data with
        # arbitrary bounds. This distribution just happens to fail above.
        # Try something slightly different.
        N = 1000
        rng = np.random.default_rng(self.seed)
        dist = stats.truncweibull_min
        shapes = (2.5, 0.25, 1.75, 2., 3.)
        data = dist.rvs(*shapes, size=N, random_state=rng)
        shape_bounds = [(0.1, 10)] * 5
        res = stats.fit(dist, data, shape_bounds, optimizer=self.opt)

        assert_nllf_less_or_close(dist, data, res.params, shapes, **self.tols)
Beispiel #16
0
    def test_foldnorm(self):
        # Can't guarantee that all distributions will fit all data with
        # arbitrary bounds. This distribution just happens to fail above.
        # Try something slightly different.
        N = 1000
        rng = np.random.default_rng(self.seed)
        dist = stats.foldnorm
        shapes = (1.952125337355587, 2., 3.)
        data = dist.rvs(*shapes, size=N, random_state=rng)
        shape_bounds = {'c': (0.1, 10), 'loc': (0.1, 10), 'scale': (0.1, 10)}
        res = stats.fit(dist, data, shape_bounds, optimizer=self.opt)

        assert_nllf_less_or_close(dist, data, res.params, shapes, **self.tols)
Beispiel #17
0
    def test_failure(self):
        N = 5000
        rng = np.random.default_rng(self.seed)

        dist = stats.nbinom
        shapes = (5, 0.5)
        data = dist.rvs(*shapes, size=N, random_state=rng)

        assert data.min() == 0
        # With lower bounds on location at 0.5, likelihood is zero
        bounds = [(0, 30), (0, 1), (0.5, 10)]
        res = stats.fit(dist, data, bounds)
        message = "Optimization converged to parameter values that are"
        assert res.message.startswith(message)
        assert res.success is False
Beispiel #18
0
    def test_everything_fixed(self):
        N = 5000
        rng = np.random.default_rng(self.seed)

        dist = stats.norm
        loc, scale = 1.5, 2.5
        data = dist.rvs(loc=loc, scale=scale, size=N, random_state=rng)

        # loc, scale fixed to 0, 1 by default
        res = stats.fit(dist, data)
        assert_allclose(res.params, (0, 1), **self.tols)

        # loc, scale explicitly fixed
        bounds = {'loc': (loc, loc), 'scale': (scale, scale)}
        res = stats.fit(dist, data, bounds)
        assert_allclose(res.params, (loc, scale), **self.tols)

        # `n` gets fixed during polishing
        dist = stats.binom
        n, p, loc = 10, 0.65, 0
        data = dist.rvs(n, p, loc=loc, size=N, random_state=rng)
        shape_bounds = {'n': (0, 20), 'p': (0.65, 0.65)}
        res = stats.fit(dist, data, shape_bounds, optimizer=self.opt)
        assert_allclose(res.params, (n, p, loc), **self.tols)
Beispiel #19
0
 def test_dist_iv(self):
     message = "`dist` must be an instance of..."
     with pytest.raises(ValueError, match=message):
         stats.fit(10, self.data, self.shape_bounds_a)
Beispiel #20
0
    def test_guess_iv(self):
        message = "Guesses provided for the following unrecognized..."
        guess = {'n': 1, 'p': 0.5, '1': 255}
        with pytest.warns(RuntimeWarning, match=message):
            stats.fit(self.dist, self.data, self.shape_bounds_d, guess=guess)

        message = "Each element of `guess` must be a scalar..."
        guess = {'n': 1, 'p': 'hi'}
        with pytest.raises(ValueError, match=message):
            stats.fit(self.dist, self.data, self.shape_bounds_d, guess=guess)
        guess = [1, 'f']
        with pytest.raises(ValueError, match=message):
            stats.fit(self.dist, self.data, self.shape_bounds_d, guess=guess)
        guess = [[1, 2]]
        with pytest.raises(ValueError, match=message):
            stats.fit(self.dist, self.data, self.shape_bounds_d, guess=guess)

        message = "A `guess` sequence must contain at least 2..."
        guess = [1]
        with pytest.raises(ValueError, match=message):
            stats.fit(self.dist, self.data, self.shape_bounds_d, guess=guess)

        message = "A `guess` sequence may not contain more than 3..."
        guess = [1, 2, 3, 4]
        with pytest.raises(ValueError, match=message):
            stats.fit(self.dist, self.data, self.shape_bounds_d, guess=guess)

        message = "Guess for parameter `n` rounded..."
        guess = {'n': 4.5, 'p': -0.5}
        with pytest.warns(RuntimeWarning, match=message):
            stats.fit(self.dist, self.data, self.shape_bounds_d, guess=guess)

        message = "Guess for parameter `loc` rounded..."
        guess = [5, 0.5, 0.5]
        with pytest.warns(RuntimeWarning, match=message):
            stats.fit(self.dist, self.data, self.shape_bounds_d, guess=guess)

        message = "Guess for parameter `p` clipped..."
        guess = {'n': 5, 'p': -0.5}
        with pytest.warns(RuntimeWarning, match=message):
            stats.fit(self.dist, self.data, self.shape_bounds_d, guess=guess)

        message = "Guess for parameter `loc` clipped..."
        guess = [5, 0.5, 1]
        with pytest.warns(RuntimeWarning, match=message):
            stats.fit(self.dist, self.data, self.shape_bounds_d, guess=guess)
def fit_dist_plot(samples, stats):
    seaborn.set(color_codes=True)
    axes = seaborn.distplot(samples, fit=stats)
    (μ, σ) = stats.fit(samples)
    axes.text(0.75, 0.9, "μ=%d, σ=%d" % (μ, σ), transform=axes.transAxes)
Beispiel #22
0
    def test_bounds_iv(self):
        message = "Bounds provided for the following unrecognized..."
        shape_bounds = {'n': (1, 10), 'p': (0, 1), '1': (0, 10)}
        with pytest.warns(RuntimeWarning, match=message):
            stats.fit(self.dist, self.data, shape_bounds)

        message = "Each element of a `bounds` sequence must be a tuple..."
        shape_bounds = [(1, 10, 3), (0, 1)]
        with pytest.raises(ValueError, match=message):
            stats.fit(self.dist, self.data, shape_bounds)

        message = "Each element of `bounds` must be a tuple specifying..."
        shape_bounds = [(1, 10, 3), (0, 1, 0.5)]
        with pytest.raises(ValueError, match=message):
            stats.fit(self.dist, self.data, shape_bounds)
        shape_bounds = [1, 0]
        with pytest.raises(ValueError, match=message):
            stats.fit(self.dist, self.data, shape_bounds)

        message = "A `bounds` sequence must contain at least 2 elements..."
        shape_bounds = [(1, 10)]
        with pytest.raises(ValueError, match=message):
            stats.fit(self.dist, self.data, shape_bounds)

        message = "A `bounds` sequence may not contain more than 3 elements..."
        bounds = [(1, 10), (1, 10), (1, 10), (1, 10)]
        with pytest.raises(ValueError, match=message):
            stats.fit(self.dist, self.data, bounds)

        message = "There are no values for `p` on the interval..."
        shape_bounds = {'n': (1, 10), 'p': (1, 0)}
        with pytest.raises(ValueError, match=message):
            stats.fit(self.dist, self.data, shape_bounds)

        message = "There are no values for `n` on the interval..."
        shape_bounds = [(10, 1), (0, 1)]
        with pytest.raises(ValueError, match=message):
            stats.fit(self.dist, self.data, shape_bounds)

        message = "There are no integer values for `n` on the interval..."
        shape_bounds = [(1.4, 1.6), (0, 1)]
        with pytest.raises(ValueError, match=message):
            stats.fit(self.dist, self.data, shape_bounds)

        message = "The intersection of user-provided bounds for `n`"
        with pytest.raises(ValueError, match=message):
            stats.fit(self.dist, self.data)
        shape_bounds = [(-np.inf, np.inf), (0, 1)]
        with pytest.raises(ValueError, match=message):
            stats.fit(self.dist, self.data, shape_bounds)