Esempio n. 1
0
def test_likelihood_fitting_gaussian():
    """
    Fit using the likelihood method.
    """
    mu, sig = parameters('mu, sig')
    sig.min = 0.01
    sig.value = 3.0
    mu.value = 50.
    x = Variable('x')
    pdf = GradientModel(Gaussian(x, mu, sig))

    np.random.seed(10)
    # TODO: Do we really need 1k points?
    xdata = np.random.normal(51., 3.5, 10000)

    # Expected parameter values
    mean = np.mean(xdata)
    stdev = np.std(xdata)
    mean_stdev = stdev / np.sqrt(len(xdata))

    fit = Fit(pdf, xdata, objective=LogLikelihood)
    fit_result = fit.execute()

    assert fit_result.value(mu) == pytest.approx(mean, 1e-6)
    assert fit_result.stdev(mu) == pytest.approx(mean_stdev, 1e-3)
    assert fit_result.value(sig) == pytest.approx(np.std(xdata), 1e-6)
Esempio n. 2
0
def test_gaussian_fitting():
    """
    Tests fitting to a gaussian function and fit_result.params unpacking.
    """
    xdata = 2 * np.random.rand(10000) - 1  # random betwen [-1, 1]
    ydata = 5.0 * scipy.stats.norm.pdf(xdata, loc=0.0, scale=1.0)

    x0 = Parameter('x0')
    sig = Parameter('sig')
    A = Parameter('A')
    x = Variable('x')
    g = GradientModel(A * Gaussian(x, x0, sig))

    fit = Fit(g, xdata, ydata)
    assert isinstance(fit.objective, LeastSquares)
    fit_result = fit.execute()

    assert fit_result.value(A) == pytest.approx(5.0)
    assert np.abs(fit_result.value(sig)) == pytest.approx(1.0)
    assert fit_result.value(x0) == pytest.approx(0.0)
    # raise Exception([i for i in fit_result.params])
    sexy = g(x=2.0, **fit_result.params)
    ugly = g(
        x=2.0,
        x0=fit_result.value(x0),
        A=fit_result.value(A),
        sig=fit_result.value(sig),
    )
    assert sexy == ugly
Esempio n. 3
0
def test_simple_kinetics():
    """
    Simple kinetics data to test fitting
    """
    tdata = np.array([10, 26, 44, 70, 120])
    adata = 10e-4 * np.array([44, 34, 27, 20, 14])
    a, b, t = variables('a, b, t')
    k, a0 = parameters('k, a0')
    k.value = 0.01
    # a0.value, a0.min, a0.max = 54 * 10e-4, 40e-4, 60e-4
    a0 = 54 * 10e-4

    model_dict = {
        D(a, t): - k * a**2,
        D(b, t): k * a**2,
    }

    ode_model = ODEModel(model_dict, initial={t: 0.0, a: a0, b: 0.0})

    # Analytical solution
    model = GradientModel({a: 1 / (k * t + 1 / a0)})
    fit = Fit(model, t=tdata, a=adata)
    fit_result = fit.execute()

    fit = Fit(ode_model, t=tdata, a=adata, b=None, minimizer=MINPACK)
    ode_result = fit.execute()
    assert ode_result.value(k) == pytest.approx(fit_result.value(k), 1e-4)
    assert ode_result.stdev(k) == pytest.approx(fit_result.stdev(k), 1e-4)
    assert ode_result.r_squared == pytest.approx(fit_result.r_squared, 1e-4)

    fit = Fit(ode_model, t=tdata, a=adata, b=None)
    ode_result = fit.execute()
    assert ode_result.value(k) == pytest.approx(fit_result.value(k), 1e-4)
    assert ode_result.stdev(k) == pytest.approx(fit_result.stdev(k), 1e-4)
    assert ode_result.r_squared == pytest.approx(fit_result.r_squared, 1e-4)
Esempio n. 4
0
def test_2_gaussian_2d_fitting():
    """
    Tests fitting to a scalar gaussian with 2 independent variables with
    tight bounds.
    """
    mean = (0.3, 0.4)  # x, y mean 0.6, 0.4
    cov = [[0.01**2, 0], [0, 0.01**2]]
    # TODO: evaluate gaussian at 100x100 points and add appropriate noise
    data = np.random.multivariate_normal(mean, cov, 3000000)
    mean = (0.7, 0.8)  # x, y mean 0.6, 0.4
    cov = [[0.01**2, 0], [0, 0.01**2]]
    data_2 = np.random.multivariate_normal(mean, cov, 3000000)
    data = np.vstack((data, data_2))

    # Insert them as y,x here as np f***s up cartesian conventions.
    ydata, xedges, yedges = np.histogram2d(data[:, 1],
                                           data[:, 0],
                                           bins=100,
                                           range=[[0.0, 1.0], [0.0, 1.0]])
    xcentres = (xedges[:-1] + xedges[1:]) / 2
    ycentres = (yedges[:-1] + yedges[1:]) / 2

    # Make a valid grid to match ydata
    xx, yy = np.meshgrid(xcentres, ycentres, sparse=False)
    # xdata = np.dstack((xx, yy)).T

    x = Variable('x')
    y = Variable('y')

    x0_1 = Parameter('x0_1', value=0.7, min=0.6, max=0.9)
    sig_x_1 = Parameter('sig_x_1', value=0.1, min=0.0, max=0.2)
    y0_1 = Parameter('y0_1', value=0.8, min=0.6, max=0.9)
    sig_y_1 = Parameter('sig_y_1', value=0.1, min=0.0, max=0.2)
    A_1 = Parameter('A_1')
    g_1 = A_1 * Gaussian(x, x0_1, sig_x_1) * Gaussian(y, y0_1, sig_y_1)

    x0_2 = Parameter('x0_2', value=0.3, min=0.2, max=0.5)
    sig_x_2 = Parameter('sig_x_2', value=0.1, min=0.0, max=0.2)
    y0_2 = Parameter('y0_2', value=0.4, min=0.2, max=0.5)
    sig_y_2 = Parameter('sig_y_2', value=0.1, min=0.0, max=0.2)
    A_2 = Parameter('A_2')
    g_2 = A_2 * Gaussian(x, x0_2, sig_x_2) * Gaussian(y, y0_2, sig_y_2)

    model = GradientModel(g_1 + g_2)
    fit = Fit(model, xx, yy, ydata)
    fit_result = fit.execute()

    assert isinstance(fit.minimizer, LBFGSB)

    img = model(x=xx, y=yy, **fit_result.params)[0]
    img_g_1 = g_1(x=xx, y=yy, **fit_result.params)
    img_g_2 = g_2(x=xx, y=yy, **fit_result.params)
    assert img == pytest.approx(img_g_1 + img_g_2)

    # Equal up to some precision. Not much obviously.
    assert fit_result.value(x0_1) == pytest.approx(0.7, 1e-3)
    assert fit_result.value(y0_1) == pytest.approx(0.8, 1e-3)
    assert fit_result.value(x0_2) == pytest.approx(0.3, 1e-3)
    assert fit_result.value(y0_2) == pytest.approx(0.4, 1e-3)
Esempio n. 5
0
def test_gaussian_2d_fitting():
    """
    Tests fitting to a scalar gaussian function with 2 independent
    variables. Very sensitive to initial guesses, and if they are chosen too
    restrictive Fit actually throws a tantrum.
    It therefore appears to be more sensitive than NumericalLeastSquares.
    """
    mean = (0.6, 0.4)  # x, y mean 0.6, 0.4
    cov = [[0.2**2, 0], [0, 0.1**2]]

    np.random.seed(0)
    data = np.random.multivariate_normal(mean, cov, 100000)

    # Insert them as y,x here as np f***s up cartesian conventions.
    ydata, xedges, yedges = np.histogram2d(data[:, 0],
                                           data[:, 1],
                                           bins=100,
                                           range=[[0.0, 1.0], [0.0, 1.0]])
    xcentres = (xedges[:-1] + xedges[1:]) / 2
    ycentres = (yedges[:-1] + yedges[1:]) / 2

    # Make a valid grid to match ydata
    xx, yy = np.meshgrid(xcentres, ycentres, sparse=False, indexing='ij')

    x0 = Parameter(value=mean[0], min=0.0, max=1.0)
    sig_x = Parameter(value=0.2, min=0.0, max=0.3)
    y0 = Parameter(value=mean[1], min=0.0, max=1.0)
    sig_y = Parameter(value=0.1, min=0.0, max=0.3)
    A = Parameter(value=np.mean(ydata), min=0.0)
    x = Variable('x')
    y = Variable('y')
    g = Variable('g')
    model = GradientModel(
        {g: A * Gaussian(x, x0, sig_x) * Gaussian(y, y0, sig_y)})
    fit = Fit(model, x=xx, y=yy, g=ydata)
    fit_result = fit.execute()

    assert fit_result.value(x0) == pytest.approx(np.mean(data[:, 0]), 1e-3)
    assert fit_result.value(y0) == pytest.approx(np.mean(data[:, 1]), 1e-3)
    assert np.abs(fit_result.value(sig_x)) == pytest.approx(
        np.std(data[:, 0]), 1e-2)
    assert np.abs(fit_result.value(sig_y)) == pytest.approx(
        np.std(data[:, 1]), 1e-2)
    assert (fit_result.r_squared, 0.96)

    # Compare with industry standard MINPACK
    fit_std = Fit(model, x=xx, y=yy, g=ydata, minimizer=MINPACK)
    fit_std_result = fit_std.execute()

    assert fit_std_result.value(x0) == pytest.approx(fit_result.value(x0),
                                                     1e-4)
    assert fit_std_result.value(y0) == pytest.approx(fit_result.value(y0),
                                                     1e-4)
    assert fit_std_result.value(sig_x) == pytest.approx(
        fit_result.value(sig_x), 1e-4)
    assert fit_std_result.value(sig_y) == pytest.approx(
        fit_result.value(sig_y), 1e-4)
    assert fit_std_result.r_squared == pytest.approx(fit_result.r_squared,
                                                     1e-4)
Esempio n. 6
0
def test_gaussian_2d_fitting_background():
    """
    Tests fitting to a scalar gaussian function with 2 independent
    variables to data with a background. Added after #149.
    """
    mean = (0.6, 0.4)  # x, y mean 0.6, 0.4
    cov = [[0.2**2, 0], [0, 0.1**2]]
    background = 3.0

    # TODO: Since we bin this data later on in a 100 bins, just evaluate 100
    #       points on a Gaussian, and add an appropriate amount of noise. This
    #       burns CPU cycles without good reason.
    data = np.random.multivariate_normal(mean, cov, 500000)
    # print(data.shape)
    # Insert them as y,x here as np f***s up cartesian conventions.
    ydata, xedges, yedges = np.histogram2d(data[:, 0],
                                           data[:, 1],
                                           bins=100,
                                           range=[[0.0, 1.0], [0.0, 1.0]])
    xcentres = (xedges[:-1] + xedges[1:]) / 2
    ycentres = (yedges[:-1] + yedges[1:]) / 2
    ydata += background  # Background

    # Make a valid grid to match ydata
    xx, yy = np.meshgrid(xcentres, ycentres, sparse=False, indexing='ij')

    x0 = Parameter('x0', value=1.1 * mean[0], min=0.0, max=1.0)
    sig_x = Parameter('sig_x', value=1.1 * 0.2, min=0.0, max=0.3)
    y0 = Parameter('y0', value=1.1 * mean[1], min=0.0, max=1.0)
    sig_y = Parameter('sig_y', value=1.1 * 0.1, min=0.0, max=0.3)
    A = Parameter('A', value=1.1 * np.mean(ydata), min=0.0)
    b = Parameter('b', value=1.2 * background, min=0.0)
    x = Variable('x')
    y = Variable('y')
    g = Variable('g')

    model = GradientModel(
        {g: A * Gaussian(x, x0, sig_x) * Gaussian(y, y0, sig_y) + b})

    # ydata, = model(x=xx, y=yy, x0=mean[0], y0=mean[1], sig_x=np.sqrt(cov[0][0]), sig_y=np.sqrt(cov[1][1]), A=1, b=3.0)
    fit = Fit(model, x=xx, y=yy, g=ydata)
    fit_result = fit.execute()

    assert fit_result.value(x0) / np.mean(data[:, 0]) == pytest.approx(
        1.0, 1e-2)
    assert fit_result.value(y0) / np.mean(data[:, 1]) == pytest.approx(
        1.0, 1e-2)
    assert np.abs(fit_result.value(sig_x)) / np.std(
        data[:, 0]) == pytest.approx(1.0, 1e-2)
    assert np.abs(fit_result.value(sig_y)) / np.std(
        data[:, 1]) == pytest.approx(1.0, 1e-2)
    assert background / fit_result.value(b) == pytest.approx(1.0, 1e-1)
    assert fit_result.r_squared >= 0.96
Esempio n. 7
0
    def setup_method(self):
        x = Variable('x')
        y = Variable('y')
        xmin, xmax = -5, 5
        self.x0_1 = Parameter('x01', value=0, min=xmin, max=xmax)
        self.sig_x_1 = Parameter('sigx1', value=0, min=0.0, max=1)
        self.y0_1 = Parameter('y01', value=0, min=xmin, max=xmax)
        self.sig_y_1 = Parameter('sigy1', value=0, min=0.0, max=1)
        self.A_1 = Parameter('A1', min=0, max=1000)
        g_1 = self.A_1 * Gaussian(x, self.x0_1, self.sig_x_1) * Gaussian(
            y, self.y0_1, self.sig_y_1)

        self.model = GradientModel(g_1)
Esempio n. 8
0
def test_gaussian_2d_fitting():
    """
    Tests fitting to a scalar gaussian function with 2 independent
    variables.
    """
    mean = (0.6, 0.4)  # x, y mean 0.6, 0.4
    cov = [[0.2**2, 0], [0, 0.1**2]]

    # TODO: Since we bin this data later on in a 100 bins, just evaluate 100
    #       points on a Gaussian, and add an appropriate amount of noise. This
    #       burns CPU cycles without good reason.
    data = np.random.multivariate_normal(mean, cov, 1000000)

    # Insert them as y,x here as np f***s up cartesian conventions.
    ydata, xedges, yedges = np.histogram2d(data[:, 0],
                                           data[:, 1],
                                           bins=100,
                                           range=[[0.0, 1.0], [0.0, 1.0]])
    xcentres = (xedges[:-1] + xedges[1:]) / 2
    ycentres = (yedges[:-1] + yedges[1:]) / 2

    # Make a valid grid to match ydata
    xx, yy = np.meshgrid(xcentres, ycentres, sparse=False, indexing='ij')

    x0 = Parameter(value=mean[0], min=0.0, max=1.0)
    sig_x = Parameter(value=0.2, min=0.0, max=0.3)
    y0 = Parameter(value=mean[1], min=0.0, max=1.0)
    sig_y = Parameter(value=0.1, min=0.0, max=0.3)
    A = Parameter(value=np.mean(ydata), min=0.0)
    x = Variable('x')
    y = Variable('y')
    g = Variable('g')

    model = GradientModel(
        {g: A * Gaussian(x, x0, sig_x) * Gaussian(y, y0, sig_y)})
    fit = Fit(model, x=xx, y=yy, g=ydata)
    fit_result = fit.execute()

    assert fit_result.value(x0) == pytest.approx(np.mean(data[:, 0]), 1e-3)
    assert fit_result.value(y0) == pytest.approx(np.mean(data[:, 1]), 1e-3)
    assert np.abs(fit_result.value(sig_x)) == pytest.approx(
        np.std(data[:, 0]), 1e-2)
    assert np.abs(fit_result.value(sig_y)) == pytest.approx(
        np.std(data[:, 1]), 1e-2)
    assert fit_result.r_squared >= 0.96
Esempio n. 9
0
def test_error_advanced():
    """
    Models an example from the mathematica docs and try's to replicate it
    using both symfit and scipy's curve_fit.
    http://reference.wolfram.com/language/howto/FitModelsWithMeasurementErrors.html
    """
    data = [[0.9, 6.1, 9.5], [3.9, 6., 9.7], [0.3, 2.8, 6.6], [1., 2.2, 5.9],
            [1.8, 2.4, 7.2], [9., 1.7, 7.], [7.9, 8., 10.4], [4.9, 3.9, 9.],
            [2.3, 2.6, 7.4], [4.7, 8.4, 10.]]
    xdata, ydata, zdata = [np.array(data) for data in zip(*data)]
    xy = np.vstack((xdata, ydata))
    errors = np.array([.4, .4, .2, .4, .1, .3, .1, .2, .2, .2])

    # raise Exception(xy, z)
    a = Parameter('a', value=3.0)
    b = Parameter('b', value=0.9)
    c = Parameter('c', value=5)
    x = Variable('x')
    y = Variable('y')
    z = Variable('z')
    model = {z: a * log(b * x + c * y)}

    # Use a gradient model because Mathematica uses the Hessian
    # approximation instead of the exact Hessian.
    model = GradientModel(model)
    fit = Fit(model, x=xdata, y=ydata, z=zdata, absolute_sigma=False)
    fit_result = fit.execute()

    # Same as Mathematica default behavior.
    assert fit_result.value(a) == pytest.approx(2.9956, 1e-4)
    assert fit_result.value(b) == pytest.approx(0.563212, 1e-4)
    assert fit_result.value(c) == pytest.approx(3.59732, 1e-4)
    assert fit_result.stdev(a) == pytest.approx(0.278304, 1e-4)
    assert fit_result.stdev(b) == pytest.approx(0.224107, 1e-4)
    assert fit_result.stdev(c) == pytest.approx(0.980352, 1e-4)

    fit = Fit(model, xdata, ydata, zdata, absolute_sigma=True)
    fit_result = fit.execute()
    # Same as Mathematica in Measurement error mode, but without suplying
    # any errors.
    assert fit_result.value(a) == pytest.approx(2.9956, 1e-4)
    assert fit_result.value(b) == pytest.approx(0.563212, 1e-4)
    assert fit_result.value(c) == pytest.approx(3.59732, 1e-4)
    assert fit_result.stdev(a) == pytest.approx(0.643259, 1e-4)
    assert fit_result.stdev(b) == pytest.approx(0.517992, 1e-4)
    assert fit_result.stdev(c) == pytest.approx(2.26594, 1e-4)

    fit = Fit(model, xdata, ydata, zdata, sigma_z=errors)
    fit_result = fit.execute()

    popt, pcov, infodict, errmsg, ier = curve_fit(
        lambda x_vec, a, b, c: a * np.log(b * x_vec[0] + c * x_vec[1]),
        xy,
        zdata,
        sigma=errors,
        absolute_sigma=True,
        full_output=True)

    # Same as curve_fit?
    assert fit_result.value(a) == pytest.approx(popt[0], 1e-4)
    assert fit_result.value(b) == pytest.approx(popt[1], 1e-4)
    assert fit_result.value(c) == pytest.approx(popt[2], 1e-4)
    assert fit_result.stdev(a) == pytest.approx(np.sqrt(pcov[0, 0]), 1e-4)
    assert fit_result.stdev(b) == pytest.approx(np.sqrt(pcov[1, 1]), 1e-4)
    assert fit_result.stdev(c) == pytest.approx(np.sqrt(pcov[2, 2]), 1e-4)

    # Same as Mathematica with MEASUREMENT ERROR
    assert fit_result.value(a) == pytest.approx(2.68807, 1e-4)
    assert fit_result.value(b) == pytest.approx(0.941344, 1e-4)
    assert fit_result.value(c) == pytest.approx(5.01541, 1e-4)
    assert fit_result.stdev(a) == pytest.approx(0.0974628, 1e-4)
    assert fit_result.stdev(b) == pytest.approx(0.247018, 1e-4)
    assert fit_result.stdev(c) == pytest.approx(0.597661, 1e-4)
Esempio n. 10
0
def test_constrained_dependent_on_model():
    """
    For a simple Gaussian distribution, we test if Models of various types
    can be used as constraints. Of particular interest are NumericalModels,
    which can be used to fix the integral of the model during the fit to 1,
    as it should be for a probability distribution.
    :return:
    """
    A, mu, sig = parameters('A, mu, sig')
    x, y, Y = variables('x, y, Y')
    i = Idx('i', (0, 1000))
    sig.min = 0.0

    model = GradientModel({y: A * Gaussian(x, mu=mu, sig=sig)})

    # Generate data, 100 samples from a N(1.2, 2) distribution
    np.random.seed(2)
    xdata = np.random.normal(1.2, 2, 1000)
    ydata, xedges = np.histogram(xdata,
                                 bins=int(np.sqrt(len(xdata))),
                                 density=True)
    xcentres = (xedges[1:] + xedges[:-1]) / 2

    # Unconstrained fit
    fit = Fit(model, x=xcentres, y=ydata)
    unconstr_result = fit.execute()

    # Constraints must be scalar models.
    with pytest.raises(ModelError):
        Model.as_constraint([A - 1, sig - 1], model, constraint_type=Eq)

    constraint_exact = Model.as_constraint(A * sqrt(2 * sympy.pi) * sig - 1,
                                           model,
                                           constraint_type=Eq)
    # Only when explicitly asked, do models behave as constraints.
    assert hasattr(constraint_exact, 'constraint_type')
    assert constraint_exact.constraint_type == Eq
    assert not hasattr(model, 'constraint_type')

    # Now lets make some valid constraints and see if they are respected!
    # FIXME These first two should be symbolical integrals over `y` instead,
    # but currently this is not converted into a numpy/scipy function. So
    # instead the first two are not valid constraints.
    constraint_model = Model.as_constraint(A - 1, model, constraint_type=Eq)
    constraint_exact = Eq(A, 1)
    constraint_num = CallableNumericalModel.as_constraint(
        {
            Y: lambda x, y: simps(y, x) - 1
        },  # Integrate using simps
        model=model,
        connectivity_mapping={Y: {x, y}},
        constraint_type=Eq)

    # Test for all these different types of constraint.
    for constraint in [constraint_model, constraint_exact, constraint_num]:
        if not isinstance(constraint, Eq):
            assert constraint.constraint_type == Eq

        xcentres = (xedges[1:] + xedges[:-1]) / 2
        fit = Fit(model, x=xcentres, y=ydata, constraints=[constraint])
        # Test if conversion into a constraint was done properly
        fit_constraint = fit.constraints[0]
        assert fit.model.params == fit_constraint.params
        assert fit_constraint.constraint_type == Eq

        con_map = fit_constraint.connectivity_mapping
        if isinstance(constraint, CallableNumericalModel):
            assert con_map == {Y: {x, y}, y: {x, mu, sig, A}}
            assert fit_constraint.independent_vars == [x]
            assert fit_constraint.dependent_vars == [Y]
            assert fit_constraint.interdependent_vars == [y]
            assert fit_constraint.params == [A, mu, sig]
        else:
            # TODO if these constraints can somehow be written as integrals
            # depending on y and x this if/else should be removed.
            assert con_map == {fit_constraint.dependent_vars[0]: {A}}
            assert fit_constraint.independent_vars == []
            assert len(fit_constraint.dependent_vars) == 1
            assert fit_constraint.interdependent_vars == []
            assert fit_constraint.params == [A, mu, sig]

        # Finally, test if the constraint worked
        fit_result = fit.execute(options={'eps': 1e-15, 'ftol': 1e-10})
        unconstr_value = fit.minimizer.wrapped_constraints[0]['fun'](
            **unconstr_result.params)
        constr_value = fit.minimizer.wrapped_constraints[0]['fun'](
            **fit_result.params)

        # TODO because of a bug by pytest we have to solve it like this
        assert constr_value[0] == pytest.approx(0, abs=1e-10)
    # And if it was very poorly met before
    assert not unconstr_value[0] == pytest.approx(0.0, 1e-1)
Esempio n. 11
0
import numpy as np
import seaborn as sns

from symfit import Parameter, Variable, Fit, GradientModel
from symfit.distributions import Gaussian

palette = sns.color_palette()

x = Variable('x')
y = Variable('y')
A = Parameter('A')
sig = Parameter(name='sig', value=1.4, min=1.0, max=2.0)
x0 = Parameter(name='x0', value=15.0, min=0.0)

# Gaussian distribution
model = GradientModel({y: A * Gaussian(x, x0, sig)})

# Sample 10000 points from a N(15.0, 1.5) distrubution
np.random.seed(seed=123456789)
sample = np.random.normal(loc=15.0, scale=1.5, size=(10000, ))
ydata, bin_edges = np.histogram(sample, 100)
xdata = (bin_edges[1:] + bin_edges[:-1]) / 2

fit = Fit(model, xdata, ydata)
fit_result = fit.execute()

y, = model(x=xdata, **fit_result.params)
sns.regplot(xdata, ydata, fit_reg=False)
plt.plot(xdata, y, color=palette[2])
plt.ylim(0, 400)
plt.show()