Exemple #1
0
def test_LeastSquares():
    """
    Tests if the LeastSquares objective gives the right shapes of output by
    comparing with its analytical equivalent.
    """
    i = Idx('i', 100)
    x, y = symbols('x, y', cls=Variable)
    X2 = symbols('X2', cls=Variable)
    a, b = parameters('a, b')

    model = Model({y: a * x**2 + b * x})
    xdata = np.linspace(0, 10, 100)
    ydata = model(x=xdata, a=5, b=2).y + np.random.normal(0, 5, xdata.shape)

    # Construct a LeastSquares objective and its analytical equivalent
    chi2_numerical = LeastSquares(model,
                                  data={
                                      x: xdata,
                                      y: ydata,
                                      model.sigmas[y]: np.ones_like(xdata)
                                  })
    chi2_exact = Model({X2: FlattenSum(0.5 * ((a * x**2 + b * x) - y)**2, i)})

    eval_exact = chi2_exact(x=xdata, y=ydata, a=2, b=3)
    jac_exact = chi2_exact.eval_jacobian(x=xdata, y=ydata, a=2, b=3)
    hess_exact = chi2_exact.eval_hessian(x=xdata, y=ydata, a=2, b=3)
    eval_numerical = chi2_numerical(x=xdata, a=2, b=3)
    jac_numerical = chi2_numerical.eval_jacobian(x=xdata, a=2, b=3)
    hess_numerical = chi2_numerical.eval_hessian(x=xdata, a=2, b=3)

    # Test model jacobian and hessian shape
    assert model(x=xdata, a=2, b=3)[0].shape == ydata.shape
    assert model.eval_jacobian(x=xdata, a=2, b=3)[0].shape == (2, 100)
    assert model.eval_hessian(x=xdata, a=2, b=3)[0].shape == (2, 2, 100)
    # Test exact chi2 shape
    assert eval_exact[0].shape, (1, )
    assert jac_exact[0].shape, (2, 1)
    assert hess_exact[0].shape, (2, 2, 1)

    # Test if these two models have the same call, jacobian, and hessian
    assert eval_exact[0] == pytest.approx(eval_numerical)
    assert isinstance(eval_numerical, float)
    assert isinstance(eval_exact[0][0], float)
    assert np.squeeze(jac_exact[0], axis=-1) == pytest.approx(jac_numerical)
    assert isinstance(jac_numerical, np.ndarray)
    assert np.squeeze(hess_exact[0], axis=-1) == pytest.approx(hess_numerical)
    assert isinstance(hess_numerical, np.ndarray)

    fit = Fit(chi2_exact, x=xdata, y=ydata, objective=MinimizeModel)
    fit_exact_result = fit.execute()
    fit = Fit(model, x=xdata, y=ydata, absolute_sigma=True)
    fit_num_result = fit.execute()
    assert fit_exact_result.value(a) == fit_num_result.value(a)
    assert fit_exact_result.value(b) == fit_num_result.value(b)
    assert fit_exact_result.stdev(a) == pytest.approx(fit_num_result.stdev(a))
    assert fit_exact_result.stdev(b) == pytest.approx(fit_num_result.stdev(b))
Exemple #2
0
def test_interdependency_constrained():
    """
    Test a model with interdependent components, and with constraints which
    depend on the Model's output.
    This is done in the MatrixSymbol formalism, using a Tikhonov
    regularization as an example. In this, a matrix inverse has to be
    calculated and is used multiple times. Therefore we split that term of
    into a seperate component, so the inverse only has to be computed once
    per model call.

    See https://arxiv.org/abs/1901.05348 for a more detailed background.
    """
    N = Symbol('N', integer=True)
    M = MatrixSymbol('M', N, N)
    W = MatrixSymbol('W', N, N)
    I = MatrixSymbol('I', N, N)
    y = MatrixSymbol('y', N, 1)
    c = MatrixSymbol('c', N, 1)
    a, = parameters('a')
    z, = variables('z')
    i = Idx('i')

    model_dict = {W: Inverse(I + M / a**2), c: -W * y, z: sqrt(c.T * c)}
    # Sympy currently does not support derivatives of matrix expressions,
    # so we use CallableModel instead of Model.
    model = CallableModel(model_dict)

    # Generate data
    iden = np.eye(2)
    M_mat = np.array([[2, 1], [3, 4]])
    y_vec = np.array([[3], [5]])
    eval_model = model(I=iden, M=M_mat, y=y_vec, a=0.1)
    # Calculate the answers 'manually' so I know it was done properly
    W_manual = np.linalg.inv(iden + M_mat / 0.1**2)
    c_manual = -np.atleast_2d(W_manual.dot(y_vec))
    z_manual = np.atleast_1d(np.sqrt(c_manual.T.dot(c_manual)))

    assert y_vec.shape == (2, 1)
    assert M_mat.shape == (2, 2)
    assert iden.shape == (2, 2)
    assert W_manual.shape == (2, 2)
    assert c_manual.shape == (2, 1)
    assert z_manual.shape == (1, 1)
    assert W_manual == pytest.approx(eval_model.W)
    assert c_manual == pytest.approx(eval_model.c)
    assert z_manual == pytest.approx(eval_model.z)
    fit = Fit(model, z=z_manual, I=iden, M=M_mat, y=y_vec)
    fit_result = fit.execute()

    # See if a == 0.1 was reconstructed properly. Since only a**2 features
    # in the equations, we check for the absolute value. Setting a.min = 0.0
    # is not appreciated by the Minimizer, it seems.
    assert np.abs(fit_result.value(a)) == pytest.approx(0.1)
Exemple #3
0
def test_LogLikelihood():
    """
    Tests if the LeastSquares objective gives the right shapes of output by
    comparing with its analytical equivalent.
    """
    # TODO: update these tests to use indexed variables in the future
    a, b = parameters('a, b')
    i = Idx('i', 100)
    x, y = variables('x, y')
    pdf = Exp(x, 1 / a) * Exp(x, b)

    np.random.seed(10)
    xdata = np.random.exponential(3.5, 100)

    # We use minus loglikelihood for the model, because the objective was
    # designed to find the maximum when used with a *minimizer*, so it has
    # opposite sign. Also test MinimizeModel at the same time.
    logL_model = Model({y: pdf})
    logL_exact = Model({y: -FlattenSum(log(pdf), i)})
    logL_numerical = LogLikelihood(logL_model, {x: xdata, y: None})
    logL_minmodel = MinimizeModel(logL_exact, data={x: xdata, y: None})

    # Test model jacobian and hessian shape
    eval_exact = logL_exact(x=xdata, a=2, b=3)
    jac_exact = logL_exact.eval_jacobian(x=xdata, a=2, b=3)
    hess_exact = logL_exact.eval_hessian(x=xdata, a=2, b=3)
    eval_minimizemodel = logL_minmodel(a=2, b=3)
    jac_minimizemodel = logL_minmodel.eval_jacobian(a=2, b=3)
    hess_minimizemodel = logL_minmodel.eval_hessian(a=2, b=3)
    eval_numerical = logL_numerical(a=2, b=3)
    jac_numerical = logL_numerical.eval_jacobian(a=2, b=3)
    hess_numerical = logL_numerical.eval_hessian(a=2, b=3)

    # TODO: These shapes should not have the ones! This is due to the current
    # convention that scalars should be returned as a 1d array by Model's.
    assert eval_exact[0].shape == (1, )
    assert jac_exact[0].shape == (2, 1)
    assert hess_exact[0].shape == (2, 2, 1)
    # Test if identical to MinimizeModel
    assert eval_exact[0] == pytest.approx(eval_minimizemodel)
    assert jac_exact[0] == pytest.approx(jac_minimizemodel)
    assert hess_exact[0] == pytest.approx(hess_minimizemodel)

    # Test if these two models have the same call, jacobian, and hessian.
    # Since models always have components as their first dimension, we have
    # to slice that away.
    assert eval_exact.y == pytest.approx(eval_numerical)
    assert isinstance(eval_numerical, float)
    assert isinstance(eval_exact.y[0], float)
    assert np.squeeze(jac_exact[0], axis=-1) == pytest.approx(jac_numerical)
    assert isinstance(jac_numerical, np.ndarray)
    assert np.squeeze(hess_exact[0], axis=-1) == pytest.approx(hess_numerical)
    assert isinstance(hess_numerical, np.ndarray)

    fit = Fit(logL_exact, x=xdata, objective=MinimizeModel)
    fit_exact_result = fit.execute()
    fit = Fit(logL_model, x=xdata, objective=LogLikelihood)
    fit_num_result = fit.execute()
    assert fit_exact_result.value(a) == pytest.approx(fit_num_result.value(a))
    assert fit_exact_result.value(b) == pytest.approx(fit_num_result.value(b))
    assert fit_exact_result.stdev(a) == pytest.approx(fit_num_result.stdev(a))
    assert fit_exact_result.stdev(b) == pytest.approx(fit_num_result.stdev(b))
Exemple #4
0
def test_constrained_dependent_on_matrixmodel():
    """
    Similar to test_constrained_dependent_on_model, but now using
    MatrixSymbols. This is much more powerful, since now the constraint can
    really be written down as a symbolical one as well.
    """
    A, mu, sig = parameters('A, mu, sig')
    M = symbols('M', integer=True)  # Number of measurements

    # Create vectors for all the quantities
    x = MatrixSymbol('x', M, 1)
    dx = MatrixSymbol('dx', M, 1)
    y = MatrixSymbol('y', M, 1)
    I = MatrixSymbol('I', M, 1)  # 'identity' vector
    Y = MatrixSymbol('Y', 1, 1)
    B = MatrixSymbol('B', M, 1)
    i = Idx('i', M)

    # Looks overly complicated, but it's just a simple Gaussian
    model = CallableModel({
        y:
        A * sympy.exp(-HadamardProduct(B, B) / (2 * sig**2)) /
        sympy.sqrt(2 * sympy.pi * sig**2),
        B: (x - mu * I)
    })
    assert model.independent_vars == [I, x]
    assert model.dependent_vars == [y]
    assert model.interdependent_vars == [B]
    assert model.params == [A, mu, sig]

    # Generate data, sample from a N(1.2, 2) distribution. Has to be 2D.
    np.random.seed(2)
    # TODO: sample points on a Guassian and add appropriate noise.
    xdata = np.random.normal(1.2, 2, size=10000)
    ydata, xedges = np.histogram(xdata,
                                 bins=int(np.sqrt(len(xdata))),
                                 density=True)
    xcentres = np.atleast_2d((xedges[1:] + xedges[:-1]) / 2).T
    xdiff = np.atleast_2d((xedges[1:] - xedges[:-1])).T
    ydata = np.atleast_2d(ydata).T
    Idata = np.ones_like(xcentres)

    assert xcentres.shape == (int(np.sqrt(len(xdata))), 1)
    assert xdiff.shape == (int(np.sqrt(len(xdata))), 1)
    assert ydata.shape == (int(np.sqrt(len(xdata))), 1)

    fit = Fit(model, x=xcentres, y=ydata, I=Idata)
    unconstr_result = fit.execute()

    constraint = CallableModel({Y: Sum(y[i, 0] * dx[i, 0], i) - 1})

    with pytest.raises(ModelError):
        fit = Fit(model,
                  x=xcentres,
                  y=ydata,
                  dx=xdiff,
                  M=len(xcentres),
                  I=Idata,
                  constraints=[constraint])

    constraint = CallableModel.as_constraint(
        {Y: Sum(y[i, 0] * dx[i, 0], i) - 1}, model=model, constraint_type=Eq)
    assert constraint.independent_vars == [I, M, dx, x]
    assert constraint.dependent_vars == [Y]
    assert constraint.interdependent_vars == [B, y]
    assert constraint.params == [A, mu, sig]
    assert constraint.constraint_type == Eq

    # Provide the extra data needed for the constraints as well
    fit = Fit(model,
              x=xcentres,
              y=ydata,
              dx=xdiff,
              M=len(xcentres),
              I=Idata,
              constraints=[constraint])

    # After treatment, our constraint should have `y` & `b` dependencies
    assert fit.constraints[0].independent_vars == [I, M, dx, x]
    assert fit.constraints[0].dependent_vars == [Y]
    assert fit.constraints[0].interdependent_vars == [B, y]
    assert fit.constraints[0].params == [A, mu, sig]
    assert fit.constraints[0].constraint_type == Eq
    assert isinstance(fit.objective, LeastSquares)
    assert isinstance(fit.minimizer.constraints[0], MinimizeModel)

    assert {k
            for k, v in fit.data.items()
            if v is not None} == {x, y, dx, M, I, fit.model.sigmas[y]}
    # These belong to internal variables
    assert {k
            for k, v in fit.data.items()
            if v is None} == {constraint.sigmas[Y], Y}

    constr_result = fit.execute()
    # The constraint should not be met for the unconstrained fit
    assert not fit.minimizer.wrapped_constraints[0]['fun'](
        **unconstr_result.params)[0] == pytest.approx(0, 1e-3)
    # And at high precision with constraint
    # TODO Change after resolve bug at pytest
    assert fit.minimizer.wrapped_constraints[0]['fun'](
        **constr_result.params)[0] == pytest.approx(0, abs=1e-8)

    # Constraining will negatively effect the R^2 value, but...
    assert constr_result.r_squared < unconstr_result.r_squared
    # both should be pretty good
    assert constr_result.r_squared > 0.99
Exemple #5
0
def test_constrained_dependent_on_model():
    """
    For a simple Gaussian distribution, we test if Models of various types
    can be used as constraints. Of particular interest are NumericalModels,
    which can be used to fix the integral of the model during the fit to 1,
    as it should be for a probability distribution.
    :return:
    """
    A, mu, sig = parameters('A, mu, sig')
    x, y, Y = variables('x, y, Y')
    i = Idx('i', (0, 1000))
    sig.min = 0.0

    model = GradientModel({y: A * Gaussian(x, mu=mu, sig=sig)})

    # Generate data, 100 samples from a N(1.2, 2) distribution
    np.random.seed(2)
    xdata = np.random.normal(1.2, 2, 1000)
    ydata, xedges = np.histogram(xdata,
                                 bins=int(np.sqrt(len(xdata))),
                                 density=True)
    xcentres = (xedges[1:] + xedges[:-1]) / 2

    # Unconstrained fit
    fit = Fit(model, x=xcentres, y=ydata)
    unconstr_result = fit.execute()

    # Constraints must be scalar models.
    with pytest.raises(ModelError):
        Model.as_constraint([A - 1, sig - 1], model, constraint_type=Eq)

    constraint_exact = Model.as_constraint(A * sqrt(2 * sympy.pi) * sig - 1,
                                           model,
                                           constraint_type=Eq)
    # Only when explicitly asked, do models behave as constraints.
    assert hasattr(constraint_exact, 'constraint_type')
    assert constraint_exact.constraint_type == Eq
    assert not hasattr(model, 'constraint_type')

    # Now lets make some valid constraints and see if they are respected!
    # FIXME These first two should be symbolical integrals over `y` instead,
    # but currently this is not converted into a numpy/scipy function. So
    # instead the first two are not valid constraints.
    constraint_model = Model.as_constraint(A - 1, model, constraint_type=Eq)
    constraint_exact = Eq(A, 1)
    constraint_num = CallableNumericalModel.as_constraint(
        {
            Y: lambda x, y: simps(y, x) - 1
        },  # Integrate using simps
        model=model,
        connectivity_mapping={Y: {x, y}},
        constraint_type=Eq)

    # Test for all these different types of constraint.
    for constraint in [constraint_model, constraint_exact, constraint_num]:
        if not isinstance(constraint, Eq):
            assert constraint.constraint_type == Eq

        xcentres = (xedges[1:] + xedges[:-1]) / 2
        fit = Fit(model, x=xcentres, y=ydata, constraints=[constraint])
        # Test if conversion into a constraint was done properly
        fit_constraint = fit.constraints[0]
        assert fit.model.params == fit_constraint.params
        assert fit_constraint.constraint_type == Eq

        con_map = fit_constraint.connectivity_mapping
        if isinstance(constraint, CallableNumericalModel):
            assert con_map == {Y: {x, y}, y: {x, mu, sig, A}}
            assert fit_constraint.independent_vars == [x]
            assert fit_constraint.dependent_vars == [Y]
            assert fit_constraint.interdependent_vars == [y]
            assert fit_constraint.params == [A, mu, sig]
        else:
            # TODO if these constraints can somehow be written as integrals
            # depending on y and x this if/else should be removed.
            assert con_map == {fit_constraint.dependent_vars[0]: {A}}
            assert fit_constraint.independent_vars == []
            assert len(fit_constraint.dependent_vars) == 1
            assert fit_constraint.interdependent_vars == []
            assert fit_constraint.params == [A, mu, sig]

        # Finally, test if the constraint worked
        fit_result = fit.execute(options={'eps': 1e-15, 'ftol': 1e-10})
        unconstr_value = fit.minimizer.wrapped_constraints[0]['fun'](
            **unconstr_result.params)
        constr_value = fit.minimizer.wrapped_constraints[0]['fun'](
            **fit_result.params)

        # TODO because of a bug by pytest we have to solve it like this
        assert constr_value[0] == pytest.approx(0, abs=1e-10)
    # And if it was very poorly met before
    assert not unconstr_value[0] == pytest.approx(0.0, 1e-1)