def test_model_callable(self): """ Tests if Model objects are callable in the way expected. Calling a model should evaluate it's expression(s) with the given values. The return value is a namedtuple. The signature should also work so inspection is saved. """ a, b = parameters('a, b') x, y = variables('x, y') new = a*x**2 + b*y**2 model = Model(new) ans = model(3, 3, 2, 2) self.assertIsInstance(ans, tuple) z, = ans self.assertEqual(z, 36) for arg_name, name in zip(('x', 'y', 'a', 'b'), inspect_sig.signature(model).parameters): self.assertEqual(arg_name, name) # From Model __init__ directly model = Model([ a*x**2, 4*b*y**2, a*x**2 + b*y**2 ]) z_1, z_2, z_3 = model(3, 3, 2, 2) self.assertEqual(z_1, 18) self.assertEqual(z_2, 72) self.assertEqual(z_3, 36) for arg_name, name in zip(('x', 'y', 'a', 'b'), inspect_sig.signature(model).parameters): self.assertEqual(arg_name, name) # From dict z_1, z_2, z_3 = variables('z_1, z_2, z_3') model = Model({ z_1: a*x**2, z_2: 4*b*y**2, z_3: a*x**2 + b*y**2 }) z_1, z_2, z_3 = model(3, 3, 2, 2) self.assertEqual(z_1, 18) self.assertEqual(z_2, 72) self.assertEqual(z_3, 36) for arg_name, name in zip(('x', 'y', 'a', 'b'), inspect_sig.signature(model).parameters): self.assertEqual(arg_name, name)
def test_vector_none_fitting(self): """ Fit to a 3 component vector valued function with one variables data set to None, without bounds or guesses. """ a, b, c = parameters('a, b, c') a_i, b_i, c_i = variables('a_i, b_i, c_i') model = {a_i: a, b_i: b, c_i: c} xdata = np.array([ [10.1, 9., 10.5, 11.2, 9.5, 9.6, 10.], [102.1, 101., 100.4, 100.8, 99.2, 100., 100.8], [71.6, 73.2, 69.5, 70.2, 70.8, 70.6, 70.1], ]) fit_none = NumericalLeastSquares( model=model, a_i=xdata[0], b_i=xdata[1], c_i=None, ) fit = NumericalLeastSquares( model=model, a_i=xdata[0], b_i=xdata[1], c_i=xdata[2], ) fit_none_result = fit_none.execute() fit_result = fit.execute() self.assertAlmostEqual(fit_none_result.value(a), fit_result.value(a), 4) self.assertAlmostEqual(fit_none_result.value(b), fit_result.value(b), 4) # the parameter without data should be unchanged. self.assertAlmostEqual(fit_none_result.value(c), 1.0)
def test_single_eval(self): """ Eval an ODEModel at a single value rather than a vector. """ x, y, t = variables('x, y, t') k, = parameters('k') # C is the integration constant. # The harmonic oscillator as a system, >1st order is not supported yet. harmonic_dict = { D(x, t): - k * y, D(y, t): k * x, } # Make a second model to prevent caching of integration results. # This also means harmonic_dict should NOT be a Model object. harmonic_model_array = ODEModel(harmonic_dict, initial={t: 0.0, x: 1.0, y: 0.0}) harmonic_model_points = ODEModel(harmonic_dict, initial={t: 0.0, x: 1.0, y: 0.0}) tdata = np.linspace(0, 100, 101) X, Y = harmonic_model_array(t=tdata, k=0.1) # Shuffle the data to prevent using the result at time t to calculate # t+dt random_order = np.random.permutation(len(tdata)) for idx in random_order: t = tdata[idx] X_val = X[idx] Y_val = Y[idx] X_point, Y_point = harmonic_model_points(t=t, k=0.1) self.assertAlmostEqual(X_point[0], X_val) self.assertAlmostEqual(Y_point[0], Y_val)
def test_simple_kinetics(self): """ Simple kinetics data to test fitting """ tdata = np.array([10, 26, 44, 70, 120]) adata = 10e-4 * np.array([44, 34, 27, 20, 14]) a, b, t = variables('a, b, t') k, a0 = parameters('k, a0') k.value = 0.01 # a0.value, a0.min, a0.max = 54 * 10e-4, 40e-4, 60e-4 a0 = 54 * 10e-4 model_dict = { D(a, t): - k * a**2, D(b, t): k * a**2, } ode_model = ODEModel(model_dict, initial={t: 0.0, a: a0, b: 0.0}) # Analytical solution model = GradientModel({a: 1 / (k * t + 1 / a0)}) fit = Fit(model, t=tdata, a=adata) fit_result = fit.execute() fit = Fit(ode_model, t=tdata, a=adata, b=None, minimizer=MINPACK) ode_result = fit.execute() self.assertAlmostEqual(ode_result.value(k) / fit_result.value(k), 1.0, 4) self.assertAlmostEqual(ode_result.stdev(k) / fit_result.stdev(k), 1.0, 4) self.assertAlmostEqual(ode_result.r_squared / fit_result.r_squared, 1, 4) fit = Fit(ode_model, t=tdata, a=adata, b=None) ode_result = fit.execute() self.assertAlmostEqual(ode_result.value(k) / fit_result.value(k), 1.0, 4) self.assertAlmostEqual(ode_result.stdev(k) / fit_result.stdev(k), 1.0, 4) self.assertAlmostEqual(ode_result.r_squared / fit_result.r_squared, 1, 4)
def test_known_solution(self): p, c1 = parameters('p, c1') y, t = variables('y, t') p.value = 3.0 model_dict = { D(y, t): - p * y, } # Lets say we know the exact solution to this problem sol = Model({y: exp(- p * t)}) # Generate some data tdata = np.linspace(0, 3, 10001) ydata = sol(t=tdata, p=3.22)[0] ydata += np.random.normal(0, 0.005, ydata.shape) ode_model = ODEModel(model_dict, initial={t: 0.0, y: ydata[0]}) fit = Fit(ode_model, t=tdata, y=ydata) ode_result = fit.execute() c1.value = ydata[0] fit = Fit(sol, t=tdata, y=ydata) fit_result = fit.execute() self.assertAlmostEqual(ode_result.value(p) / fit_result.value(p), 1, 2) self.assertAlmostEqual(ode_result.r_squared / fit_result.r_squared, 1, 4) self.assertAlmostEqual(ode_result.stdev(p) / fit_result.stdev(p), 1, 3)
def test_vector_fitting(self): """ Tests fitting to a 3 component vector valued function, without bounds or guesses. """ a, b, c = parameters('a, b, c') a_i, b_i, c_i = variables('a_i, b_i, c_i') model = {a_i: a, b_i: b, c_i: c} xdata = np.array([ [10.1, 9., 10.5, 11.2, 9.5, 9.6, 10.], [102.1, 101., 100.4, 100.8, 99.2, 100., 100.8], [71.6, 73.2, 69.5, 70.2, 70.8, 70.6, 70.1], ]) fit = NumericalLeastSquares( model=model, a_i=xdata[0], b_i=xdata[1], c_i=xdata[2], ) fit_result = fit.execute() self.assertAlmostEqual(fit_result.value(a), 9.985691, 6) self.assertAlmostEqual(fit_result.value(b), 1.006143e+02, 4) self.assertAlmostEqual(fit_result.value(c), 7.085713e+01, 5)
def test_global_fitting(self): """ In case of shared parameters between the components of the model, `Fit` should automatically use `ConstrainedLeastSquares`. :return: """ x_1, x_2, y_1, y_2 = variables('x_1, x_2, y_1, y_2') y0, a_1, a_2, b_1, b_2 = parameters('y0, a_1, a_2, b_1, b_2') # The following vector valued function links all the equations together # as stated in the intro. model = Model({ y_1: a_1 * x_1**2 + b_1 * x_1 + y0, y_2: a_2 * x_2**2 + b_2 * x_2 + y0, }) self.assertTrue(model.shared_parameters) # Generate data from this model xdata1 = np.linspace(0, 10) xdata2 = xdata1[::2] # Only every other point. ydata1, ydata2 = model(x_1=xdata1, x_2=xdata2, a_1=101.3, b_1=0.5, a_2=56.3, b_2=1.1111, y0=10.8) # Add some noise to make it appear like real data np.random.seed(1) ydata1 += np.random.normal(0, 2, size=ydata1.shape) ydata2 += np.random.normal(0, 2, size=ydata2.shape) xdata = [xdata1, xdata2] ydata = [ydata1, ydata2] # Guesses a_1.value = 100 a_2.value = 50 b_1.value = 1 b_2.value = 1 y0.value = 10 fit = Fit( model, x_1=xdata[0], x_2=xdata[1], y_1=ydata[0], y_2=ydata[1] ) self.assertIsInstance(fit.fit, ConstrainedNumericalLeastSquares) # The next model does not share parameters, but is still a vector model = Model({ y_1: a_1 * x_1**2 + b_1 * x_1, y_2: a_2 * x_2**2 + b_2 * x_2, }) fit = Fit( model, x_1=xdata[0], x_2=xdata[1], y_1=ydata[0], y_2=ydata[1] ) self.assertFalse(model.shared_parameters) self.assertIsInstance(fit.fit, NumericalLeastSquares) # Scalar model, so it should use NumericalLeastSquares. model = Model({ y_1: a_1 * x_1**2 + b_1 * x_1, }) fit = Fit(model, x_1=xdata[0], y_1=ydata[0]) self.assertFalse(model.shared_parameters) self.assertIsInstance(fit.fit, NumericalLeastSquares)
def test_full_eval_range(self): """ Test if ODEModels can be evaluated at t < t_initial. A bit of a no news is good news test. """ tdata = np.array([0, 10, 26, 44, 70, 120]) adata = 10e-4 * np.array([54, 44, 34, 27, 20, 14]) a, b, t = variables('a, b, t') k, a0 = parameters('k, a0') k.value = 0.01 t0 = tdata[2] a0 = adata[2] b0 = 0.02729855 # Obtained from evaluating from t=0. model_dict = { D(a, t): - k * a**2, D(b, t): k * a**2, } ode_model = ODEModel(model_dict, initial={t: t0, a: a0, b: b0}) fit = Fit(ode_model, t=tdata, a=adata, b=None) ode_result = fit.execute() self.assertGreater(ode_result.r_squared, 0.95, 4) # Now start from a timepoint that is not in the t-array such that it # triggers another pathway to be taken in integrating it. # Again, no news is good news. ode_model = ODEModel(model_dict, initial={t: t0 + 1e-5, a: a0, b: b0}) fit = Fit(ode_model, t=tdata, a=adata, b=None) ode_result = fit.execute() self.assertGreater(ode_result.r_squared, 0.95, 4)
def test_harmonic_oscillator_errors(self): """ Make sure the errors produced by fitting ODE's are the same as when fitting an exact solution. """ x, v, t = sf.variables('x, v, t') k = sf.Parameter(name='k', value=100) m = 1 a = -k/m * x ode_model = sf.ODEModel({sf.D(v, t): a, sf.D(x, t): v}, initial={t: 0, v: 0, x: 1}) t_data = np.linspace(0, 10, 250) np.random.seed(2) noise = np.random.normal(1, 0.05, size=t_data.shape) x_data = ode_model(t=t_data, k=100).x * noise ode_fit = sf.Fit(ode_model, t=t_data, x=x_data) ode_result = ode_fit.execute() phi = 0 A = 1 model = sf.Model({x: A * sf.cos(sf.sqrt(k/m) * t + phi)}) fit = sf.Fit(model, t=t_data, x=x_data) result = fit.execute() self.assertAlmostEqual(result.value(k), ode_result.value(k), places=4) self.assertAlmostEqual(result.stdev(k) / ode_result.stdev(k), 1, 2) self.assertGreaterEqual(result.stdev(k), ode_result.stdev(k))
def test_nonlinearfit(self): """ Compare NumericalLeastSquares with LinearLeastSquares to see if errors are implemented consistently. """ from symfit import Variable, Parameter, Fit t_data = np.array([1.4, 2.1, 2.6, 3.0, 3.3]) y_data = np.array([10, 20, 30, 40, 50]) sigma = 0.2 n = np.array([5, 3, 8, 15, 30]) sigma_t = sigma / np.sqrt(n) # We now define our model t, y = variables('t, y') g = Parameter(9.0) t_model = {t: (2 * y / g)**0.5} # Different sigma for every point fit = NonLinearLeastSquares(t_model, y=y_data, t=t_data, sigma_t=sigma_t) import time tick = time.time() fit_result = fit.execute() # print(time.time() - tick) fit = NumericalLeastSquares(t_model, y=y_data, t=t_data, sigma_t=sigma_t) tick = time.time() num_result = fit.execute() # print(time.time() - tick) self.assertAlmostEqual(num_result.value(g), fit_result.value(g)) for cov1, cov2 in zip(num_result.params.covariance_matrix.flatten(), fit_result.params.covariance_matrix.flatten()): self.assertAlmostEqual(cov1, cov2)
def test_pickle(self): """ Make sure models can be pickled are preserved when pickling """ a, b = parameters('a, b') x, y = variables('x, y') exact_model = Model({y: a * x ** b}) constraint = Model.as_constraint(Eq(a, b), exact_model) num_model = CallableNumericalModel( {y: a * x ** b}, independent_vars=[x], params=[a, b] ) connected_num_model = CallableNumericalModel( {y: a * x ** b}, connectivity_mapping={y: {x, a, b}} ) # Test if lsoda args and kwargs are pickled too ode_model = ODEModel({D(y, x): a * x + b}, {x: 0.0}, 3, 4, some_kwarg=True) models = [exact_model, constraint, num_model, ode_model, connected_num_model] for model in models: new_model = pickle.loads(pickle.dumps(model)) # Compare signatures self.assertEqual(model.__signature__, new_model.__signature__) # Trigger the cached vars because we compare `__dict__` s model.vars new_model.vars # Explicitly make sure the connectivity mapping is identical. self.assertEqual(model.connectivity_mapping, new_model.connectivity_mapping) if not isinstance(model, ODEModel): model.function_dict model.vars_as_functions new_model.function_dict new_model.vars_as_functions self.assertEqual(model.__dict__, new_model.__dict__)
def test_taylor_model(self): a, b = parameters('a, b') x, y, z = variables('x, y, z') model = Model({y: a * x + b}) appr = TaylorModel(model) self.assertEqual(set([a, b]), set(appr.params)) appr.p0 = {a: 2.0, b: 5.0} self.assertEqual(set(appr.p0.keys()), set(appr.params_0[p] for p in appr.params)) self.assertTrue(LinearLeastSquares.is_linear(appr)) model = Model({z: a * x**2 + b * y**2}) appr = TaylorModel(model) appr.p0 = {a: 2, b: 5} model = Model({z: a * x**2 + b * y**2}) appr_2 = TaylorModel(model) appr_2.p0 = {a: 1, b: 1} self.assertTrue(appr == appr_2) model = Model({y: a * sympy.exp(x * b)}) appr = TaylorModel(model) appr.p0 = {a: 2.0, b: 5.0} self.assertTrue(LinearLeastSquares.is_linear(appr)) model = Model({y: sympy.sin(a * x)}) appr = TaylorModel(model) appr.p0 = {a: 0.0} self.assertTrue(LinearLeastSquares.is_linear(appr))
def test_simple_kinetics(self): """ Simple kinetics data to test fitting """ tdata = np.array([10, 26, 44, 70, 120]) adata = 10e-4 * np.array([44, 34, 27, 20, 14]) a, b, t = variables('a, b, t') k, a0 = parameters('k, a0') k.value = 0.01 # a0.value, a0.min, a0.max = 54 * 10e-4, 40e-4, 60e-4 a0 = 54 * 10e-4 model_dict = { D(a, t): - k * a**2, D(b, t): k * a**2, } ode_model = ODEModel(model_dict, initial={t: 0.0, a: a0, b: 0.0}) # Generate some data tvec = np.linspace(0, 500, 1000) fit = NumericalLeastSquares(ode_model, t=tdata, a=adata, b=None) fit_result = fit.execute() # print(fit_result) self.assertAlmostEqual(fit_result.value(k), 4.302875e-01, 4) self.assertAlmostEqual(fit_result.stdev(k), 6.447068e-03, 4) fit = Fit(ode_model, t=tdata, a=adata, b=None) fit_result = fit.execute() # print(fit_result) self.assertAlmostEqual(fit_result.value(k), 4.302875e-01, 4) self.assertTrue(np.isnan(fit_result.stdev(k)))
def test_likelihood_fitting_exponential(self): """ Fit using the likelihood method. """ b = Parameter(value=4, min=3.0) x, y = variables('x, y') pdf = {y: Exp(x, 1/b)} # Draw points from an Exp(5) exponential distribution. np.random.seed(100) xdata = np.random.exponential(5, 1000000) # Expected parameter values mean = np.mean(xdata) stdev = np.std(xdata) mean_stdev = stdev / np.sqrt(len(xdata)) with self.assertRaises(NotImplementedError): fit = Fit(pdf, x=xdata, sigma_y=2.0, objective=LogLikelihood) fit = Fit(pdf, xdata, objective=LogLikelihood) fit_result = fit.execute() self.assertAlmostEqual(fit_result.value(b) / mean, 1, 3) self.assertAlmostEqual(fit_result.value(b) / stdev, 1, 3) self.assertAlmostEqual(fit_result.stdev(b) / mean_stdev, 1, 3)
def test_straight_line_analytical(self): """ Test symfit against a straight line, for which the parameters and their uncertainties are known analytically. Assuming equal weights. """ data = [[0, 1], [1, 0], [3, 2], [5, 4]] xdata, ydata = (np.array(i, dtype='float64') for i in zip(*data)) # x = np.arange(0, 100, 0.1) # np.random.seed(10) # y = 3.0*x + 105.0 + np.random.normal(size=x.shape) dx = xdata - xdata.mean() dy = ydata - ydata.mean() mean_squared_x = np.mean(xdata**2) - np.mean(xdata)**2 mean_xy = np.mean(xdata * ydata) - np.mean(xdata)*np.mean(ydata) a = mean_xy/mean_squared_x b = ydata.mean() - a * xdata.mean() self.assertAlmostEqual(a, 0.694915, 6) # values from Mathematica self.assertAlmostEqual(b, 0.186441, 6) S = np.sum((ydata - (a*xdata + b))**2) var_a_exact = S/(len(xdata) * (len(xdata) - 2) * mean_squared_x) var_b_exact = var_a_exact*np.mean(xdata**2) a_exact = a b_exact = b # We will now compare these exact results with values from symfit, numerically a, b = parameters('a, b') x, y = variables('x, y') model = {y: a*x + b} fit = NumericalLeastSquares(model, x=xdata, y=ydata)#, absolute_sigma=False) fit_result = fit.execute() popt, pcov = curve_fit(lambda z, c, d: c * z + d, xdata, ydata, jac=lambda z, c, d: np.transpose([xdata, np.ones_like(xdata)])) # jac=lambda p, x, y, func: np.transpose([x, np.ones_like(x)])) # Dfun=lambda p, x, y, func: print(p, func, x, y)) # curve_fit self.assertAlmostEqual(a_exact, popt[0], 4) self.assertAlmostEqual(b_exact, popt[1], 4) self.assertAlmostEqual(var_a_exact, pcov[0][0], 6) self.assertAlmostEqual(var_b_exact, pcov[1][1], 6) self.assertAlmostEqual(a_exact, fit_result.value(a), 4) self.assertAlmostEqual(b_exact, fit_result.value(b), 4) self.assertAlmostEqual(var_a_exact, fit_result.variance(a), 6) self.assertAlmostEqual(var_b_exact, fit_result.variance(b), 6) # Do the fit with the LinearLeastSquares object fit = LinearLeastSquares(model, x=xdata, y=ydata) fit_result = fit.execute() self.assertAlmostEqual(a_exact, fit_result.value(a), 4) self.assertAlmostEqual(b_exact, fit_result.value(b), 4) self.assertAlmostEqual(var_a_exact, fit_result.variance(a), 6) self.assertAlmostEqual(var_b_exact, fit_result.variance(b), 6) # Lets also make sure the entire covariance matrix is the same for cov1, cov2 in zip(fit_result.params.covariance_matrix.flatten(), pcov.flatten()): self.assertAlmostEqual(cov1, cov2)
def test_data_for_constraint(self): """ Test the signature handling when constraints are at play. Constraints should take seperate data, but still kwargs that are not found in either the model nor the constraints should raise an error. """ A, mu, sig = parameters('A, mu, sig') x, y, Y = variables('x, y, Y') model = Model({y: A * Gaussian(x, mu=mu, sig=sig)}) constraint = Model.as_constraint(Y, model, constraint_type=Eq) np.random.seed(2) xdata = np.random.normal(1.2, 2, 10) ydata, xedges = np.histogram(xdata, bins=int(np.sqrt(len(xdata))), density=True) # Allowed fit = Fit(model, x=xdata, y=ydata, Y=2, constraints=[constraint]) fit = Fit(model, x=xdata, y=ydata) fit = Fit(model, x=xdata, objective=LogLikelihood) # Not allowed with self.assertRaises(TypeError): fit = Fit(model, x=xdata, y=ydata, Y=2) with self.assertRaises(TypeError): fit = Fit(model, x=xdata, y=ydata, Y=2, Z=3, constraints=[constraint])
def test_vector_fitting_guess(self): """ Tests fitting to a 3 component vector valued function, with guesses. """ a, b, c = parameters('a, b, c') a.value = 10 b.value = 100 a_i, b_i, c_i = variables('a_i, b_i, c_i') model = {a_i: a, b_i: b, c_i: c} xdata = np.array([ [10.1, 9., 10.5, 11.2, 9.5, 9.6, 10.], [102.1, 101., 100.4, 100.8, 99.2, 100., 100.8], [71.6, 73.2, 69.5, 70.2, 70.8, 70.6, 70.1], ]) fit = NumericalLeastSquares( model=model, a_i=xdata[0], b_i=xdata[1], c_i=xdata[2], ) fit_result = fit.execute() self.assertAlmostEqual(fit_result.value(a), np.mean(xdata[0]), 4) self.assertAlmostEqual(fit_result.value(b), np.mean(xdata[1]), 4) self.assertAlmostEqual(fit_result.value(c), np.mean(xdata[2]), 4)
def test_interdependency_constrained(self): """ Test a model with interdependent components, and with constraints which depend on the Model's output. This is done in the MatrixSymbol formalism, using a Tikhonov regularization as an example. In this, a matrix inverse has to be calculated and is used multiple times. Therefore we split that term of into a seperate component, so the inverse only has to be computed once per model call. See https://arxiv.org/abs/1901.05348 for a more detailed background. """ N = Symbol('N', integer=True) M = MatrixSymbol('M', N, N) W = MatrixSymbol('W', N, N) I = MatrixSymbol('I', N, N) y = MatrixSymbol('y', N, 1) c = MatrixSymbol('c', N, 1) a, = parameters('a') z, = variables('z') i = Idx('i') model_dict = { W: Inverse(I + M / a ** 2), c: - W * y, z: sqrt(c.T * c) } # Sympy currently does not support derivatives of matrix expressions, # so we use CallableModel instead of Model. model = CallableModel(model_dict) # Generate data iden = np.eye(2) M_mat = np.array([[2, 1], [3, 4]]) y_vec = np.array([[3], [5]]) eval_model = model(I=iden, M=M_mat, y=y_vec, a=0.1) # Calculate the answers 'manually' so I know it was done properly W_manual = np.linalg.inv(iden + M_mat / 0.1 ** 2) c_manual = - np.atleast_2d(W_manual.dot(y_vec)) z_manual = np.atleast_1d(np.sqrt(c_manual.T.dot(c_manual))) self.assertEqual(y_vec.shape, (2, 1)) self.assertEqual(M_mat.shape, (2, 2)) self.assertEqual(iden.shape, (2, 2)) self.assertEqual(W_manual.shape, (2, 2)) self.assertEqual(c_manual.shape, (2, 1)) self.assertEqual(z_manual.shape, (1, 1)) np.testing.assert_almost_equal(W_manual, eval_model.W) np.testing.assert_almost_equal(c_manual, eval_model.c) np.testing.assert_almost_equal(z_manual, eval_model.z) fit = Fit(model, z=z_manual, I=iden, M=M_mat, y=y_vec) fit_result = fit.execute() # See if a == 0.1 was reconstructed properly. Since only a**2 features # in the equations, we check for the absolute value. Setting a.min = 0.0 # is not appreciated by the Minimizer, it seems. self.assertAlmostEqual(np.abs(fit_result.value(a)), 0.1)
def test_jacobian_matrix(self): """ The jacobian matrix of a model should be a 2D list (matrix) containing all the partial derivatives. """ a, b, c = parameters('a, b, c') a_i, b_i, c_i = variables('a_i, b_i, c_i') model = Model({a_i: 2 * a + 3 * b, b_i: 5 * b, c_i: 7 * c}) self.assertEqual([[2, 3, 0], [0, 5, 0], [0, 0, 7]], model.jacobian)
def test_global_fitting(self): """ Test a global fitting scenario with datasets of unequal length. In this scenario, a quartic equation is fitted where the constant term is shared between the datasets. (e.g. identical background noise) """ x_1, x_2, y_1, y_2 = variables('x_1, x_2, y_1, y_2') y0, a_1, a_2, b_1, b_2 = parameters('y0, a_1, a_2, b_1, b_2') # The following vector valued function links all the equations together # as stated in the intro. model = Model({ y_1: a_1 * x_1**2 + b_1 * x_1 + y0, y_2: a_2 * x_2**2 + b_2 * x_2 + y0, }) # Generate data from this model # xdata = np.linspace(0, 10) xdata1 = np.linspace(0, 10) xdata2 = xdata1[::2] # Make the sets of unequal size ydata1, ydata2 = model(x_1=xdata1, x_2=xdata2, a_1=101.3, b_1=0.5, a_2=56.3, b_2=1.1111, y0=10.8) # Add some noise to make it appear like real data np.random.seed(1) ydata1 += np.random.normal(0, 2, size=ydata1.shape) ydata2 += np.random.normal(0, 2, size=ydata2.shape) xdata = [xdata1, xdata2] ydata = [ydata1, ydata2] # Guesses a_1.value = 100 a_2.value = 50 b_1.value = 1 b_2.value = 1 y0.value = 10 eval_jac = model.eval_jacobian(x_1=xdata1, x_2=xdata2, a_1=101.3, b_1=0.5, a_2=56.3, b_2=1.1111, y0=10.8) self.assertEqual(len(eval_jac), 2) for comp in eval_jac: self.assertEqual(len(comp), len(model.params)) sigma_y = np.concatenate((np.ones(20), [2., 4., 5, 7, 3])) fit = Fit(model, x_1=xdata[0], x_2=xdata[1], y_1=ydata[0], y_2=ydata[1], sigma_y_2=sigma_y) fit_result = fit.execute() # fit_curves = model(x_1=xdata[0], x_2=xdata[1], **fit_result.params) self.assertAlmostEqual(fit_result.value(y0), 1.061892e+01, 3) self.assertAlmostEqual(fit_result.value(a_1), 1.013269e+02, 3) self.assertAlmostEqual(fit_result.value(a_2), 5.625694e+01, 3) self.assertAlmostEqual(fit_result.value(b_1), 3.362240e-01, 3) self.assertAlmostEqual(fit_result.value(b_2), 1.565253e+00, 3)
def test_order(self): """ The model has to behave like an OrderedDict. This is of the utmost importance! """ x, y_1, y_2 = variables('x, y_1, y_2') a, b = parameters('a, b') model_dict = {y_2: a * x**2, y_1: 2 * x * b} model = Model(model_dict) self.assertEqual(model.dependent_vars, list(model.keys()))
def test_weights(self): """ Compare NumericalLeastSquares with LinearLeastSquares to see if errors are implemented consistently. """ from symfit import Variable, Parameter, Fit t_data = np.array([1.4, 2.1, 2.6, 3.0, 3.3]) y_data = np.array([10, 20, 30, 40, 50]) sigma = 0.2 n = np.array([5, 3, 8, 15, 30]) sigma_t = sigma / np.sqrt(n) # We now define our model t, y = variables("t, y") b = Parameter() sqrt_g_inv = Parameter() # sqrt_g_inv = sqrt(1/g). Currently needed to linearize. # t_model = (2 * y / g)**0.5 t_model = {t: 2 * y ** 0.5 * sqrt_g_inv + b} # Different sigma for every point fit = NumericalLeastSquares(t_model, y=y_data, t=t_data, sigma_t=sigma_t, absolute_sigma=False) num_result_rel = fit.execute() fit = NumericalLeastSquares(t_model, y=y_data, t=t_data, sigma_t=sigma_t, absolute_sigma=True) num_result = fit.execute() # cov matrix should now be different for cov1, cov2 in zip( num_result_rel.params.covariance_matrix.flatten(), num_result.params.covariance_matrix.flatten() ): # Make the absolute cov relative to see if it worked. ss_res = np.sum(num_result_rel.infodict["fvec"] ** 2) degrees_of_freedom = len(fit.data[fit.model.dependent_vars[0].name]) - len(fit.model.params) s_sq = ss_res / degrees_of_freedom self.assertAlmostEqual(cov1, cov2 * s_sq) # print(fit.model.numerical_chi_jacobian[0](sqrt_g_inv=1, **fit.data)) fit = LinearLeastSquares(t_model, y=y_data, t=t_data, sigma_t=sigma_t) fit_result = fit.execute() self.assertAlmostEqual(num_result.value(sqrt_g_inv), fit_result.value(sqrt_g_inv)) self.assertAlmostEqual(num_result.value(b), fit_result.value(b)) # for cov1, cov2 in zip(num_result.params.covariance_matrix.flatten(), fit_result.params.covariance_matrix.flatten()): # self.assertAlmostEqual(cov1, cov2) # print(cov1, cov2) for cov1, cov2 in zip( num_result.params.covariance_matrix.flatten(), fit_result.params.covariance_matrix.flatten() ): self.assertAlmostEqual(cov1, cov2)
def test_hessian_matrix(self): """ The Hessian matrix of a model should be a 3D list (matrix) containing all the 2nd partial derivatives. """ a, b, c = parameters('a, b, c') a_i, b_i, c_i = variables('a_i, b_i, c_i') model = Model({a_i: 2 * a**2 + 3 * b, b_i: 5 * b**2, c_i: 7 * c*b}) self.assertEqual([[[4, 0, 0], [0, 0, 0], [0, 0, 0]], [[0, 0, 0], [0, 10, 0], [0, 0, 0]], [[0, 0, 0], [0, 0, 7], [0, 7, 0]]], model.hessian)
def test_van_der_pol(self): """ http://hplgit.github.io/odespy/doc/pub/tutorial/html/main_odespy.html """ u_0, u_1, t = variables('u_0, u_1, t') model_dict = { D(u_0, t): u_1, D(u_1, t): 3 * (1 - u_0**2) * u_1 - u_1 } ode_model = ODEModel(model_dict, initial={t: 0.0, u_0: 2.0, u_1: 1.0})
def test_linear_analytical_fit(self): a, b = parameters('a, b') x, y = variables('x, y') model = {y: a * x + b} data = [[0, 1], [1, 0], [3, 2], [5, 4]] xdata, ydata = (np.array(i, dtype='float64') for i in zip(*data)) fit = LinearLeastSquares(model, x=xdata, y=ydata) fit_result = fit.execute() self.assertAlmostEqual(fit_result.value(a), 0.694915, 6) # values from Mathematica self.assertAlmostEqual(fit_result.value(b), 0.186441, 6)
def test_unequal_data(self): """ Test to make sure finite differences work with data of unequal length. """ x_1, x_2, y_1, y_2 = sf.variables('x_1, x_2, y_1, y_2') y0, a_1, a_2, b_1, b_2 = sf.parameters('y0, a_1, a_2, b_1, b_2') model = sf.Model({ y_1: a_1 * x_1**2 + b_1 * x_1 + y0, y_2: a_2 * x_2**2 + b_2 * x_2 + y0, }) # Generate data from this model xdata1 = np.linspace(0, 10) xdata2 = xdata1[::2] # Only every other point. exact = model.eval_jacobian(x_1=xdata1, x_2=xdata2, a_1=101.3, b_1=0.5, a_2=56.3, b_2=1.1111, y0=10.8) approx = model.finite_difference(x_1=xdata1, x_2=xdata2, a_1=101.3, b_1=0.5, a_2=56.3, b_2=1.1111, y0=10.8) # First axis is the number of components self.assertEqual(len(exact), 2) self.assertEqual(len(approx), 2) # Second axis is the number of parameters, same for all components for exact_comp, approx_comp, xdata in zip(exact, approx, [xdata1, xdata2]): self.assertEqual(len(exact_comp), len(model.params)) self.assertEqual(len(approx_comp), len(model.params)) for exact_elem, approx_elem in zip(exact_comp, approx_comp): self.assertEqual(exact_elem.shape, xdata.shape) self.assertEqual(approx_elem.shape, xdata.shape) self._assert_equal(exact, approx, rtol=1e-4) model = sf.Model({ y_1: a_1 * x_1**2 + b_1 * x_1, y_2: a_2 * x_2**2 + b_2 * x_2, }) exact = model.eval_jacobian(x_1=xdata1, x_2=xdata2, a_1=101.3, b_1=0.5, a_2=56.3, b_2=1.1111) approx = model.finite_difference(x_1=xdata1, x_2=xdata2, a_1=101.3, b_1=0.5, a_2=56.3, b_2=1.1111) self._assert_equal(exact, approx, rtol=1e-4) model = sf.Model({ y_1: a_1 * x_1**2 + b_1 * x_1, }) exact = model.eval_jacobian(x_1=xdata1, a_1=101.3, b_1=0.5) approx = model.finite_difference(x_1=xdata1, a_1=101.3, b_1=0.5) self._assert_equal(exact, approx, rtol=1e-4)
def test_1_multi_model(self): '''Tests the case with 1 component and multiple parameters''' x, y = sf.variables('x, y') a, b = sf.parameters('a, b') model = sf.Model({y: 3 * a * x**2 - sf.exp(b) * x}) x_data = np.arange(10) exact = model.eval_jacobian(x=x_data, a=3.5, b=2) approx = model.finite_difference(x=x_data, a=3.5, b=2) np.testing.assert_allclose(exact, approx) exact = model.eval_jacobian(x=3, a=3.5, b=2) approx = model.finite_difference(x=3, a=3.5, b=2) np.testing.assert_allclose(exact, approx)
def test_MatrixSymbolModel(self): """ Test a model which is defined by ModelSymbols, see #194 """ N = Symbol('N', integer=True) M = MatrixSymbol('M', N, N) W = MatrixSymbol('W', N, N) I = MatrixSymbol('I', N, N) y = MatrixSymbol('y', N, 1) c = MatrixSymbol('c', N, 1) a, b = parameters('a, b') z, x = variables('z, x') model_dict = { W: Inverse(I + M / a ** 2), c: - W * y, z: sqrt(c.T * c) } # TODO: This should be a Model in the future, but sympy is not yet # capable of computing Matrix derivatives at the time of writing. model = CallableModel(model_dict) self.assertEqual(model.params, [a]) self.assertEqual(model.independent_vars, [I, M, y]) self.assertEqual(model.dependent_vars, [z]) self.assertEqual(model.interdependent_vars, [W, c]) self.assertEqual(model.connectivity_mapping, {W: {I, M, a}, c: {W, y}, z: {c}}) # Generate data iden = np.eye(2) M_mat = np.array([[2, 1], [3, 4]]) y_vec = np.array([3, 5]) eval_model = model(I=iden, M=M_mat, y=y_vec, a=0.1) W_manual = np.linalg.inv(iden + M_mat / 0.1 ** 2) c_manual = - W_manual.dot(y_vec) z_manual = np.atleast_1d(np.sqrt(c_manual.T.dot(c_manual))) np.testing.assert_allclose(eval_model.W, W_manual) np.testing.assert_allclose(eval_model.c, c_manual) np.testing.assert_allclose(eval_model.z, z_manual) # Now try to retrieve the value of `a` from a fit a.value = 0.2 fit = Fit(model, z=z_manual, I=iden, M=M_mat, y=y_vec) fit_result = fit.execute() eval_model = model(I=iden, M=M_mat, y=y_vec, **fit_result.params) self.assertAlmostEqual(0.1, np.abs(fit_result.value(a))) np.testing.assert_allclose(eval_model.W, W_manual, rtol=1e-5) np.testing.assert_allclose(eval_model.c, c_manual, rtol=1e-5) np.testing.assert_allclose(eval_model.z, z_manual, rtol=1e-5)
def test_model_from_dict(self): """ Tries to create a model from a dictionary. """ x, y_1, y_2 = variables('x, y_1, y_2') a, b = parameters('a, b') # This way the test fails rather than errors. try: Model({ y_1: 2 * a * x, y_2: b * x**2 }) except Exception as error: self.fail('test_model_from_dict raised {}'.format(error))
def test_named_fitting(self): xdata = np.linspace(1, 10, 10) ydata = 3*xdata**2 a = Parameter(1.0) b = Parameter(2.5) x, y = variables('x, y') model = {y: a*x**b} fit = ConstrainedNumericalLeastSquares(model, x=xdata, y=ydata) fit_result = fit.execute() self.assertIsInstance(fit_result, FitResults) self.assertAlmostEqual(fit_result.value(a), 3.0, 3) self.assertAlmostEqual(fit_result.value(b), 2.0, 4)
from symfit import parameters, variables, Fit, Piecewise, exp, Eq, Model import numpy as np import matplotlib.pyplot as plt t, y = variables('t, y') a, b, d, k, t0 = parameters('a, b, d, k, t0') # Make a piecewise model y1 = a * t + b y2 = d * exp(-k * t) model = Model({y: Piecewise((y1, t <= t0), (y2, t > t0))}) # As a constraint, we demand equality between the two models at the point t0 # to do this, we substitute t -> t0 and demand equality using `Eq` constraints = [Eq(y1.diff(t).subs({t: t0}), y2.diff(t).subs({t: t0}))] # # Generate example data tdata = np.linspace(0, 4., 200) ydata = model(t=tdata, a=63, b=300, d=2205, k=3, t0=0.65).y ydata = np.random.normal(ydata, 0.05 * ydata) # add 5% noise # Help the fit by bounding the switchpoint between the models and giving initial # guesses t0.min = 0.5 t0.max = 0.8 b.value = 320 fit = Fit(model, t=tdata, y=ydata, constraints=constraints) fit_result = fit.execute() print(fit_result)
def test_vector_fitting(self): """ Test the behavior in the presence of bounds or constraints: `Fit` should select `ConstrainedNumericalLeastSquares` when bounds or constraints are provided, or for vector models in general. For scalar models, use `NumericalLeastSquares`. """ a, b, c = parameters('a, b, c') a_i, b_i, c_i = variables('a_i, b_i, c_i') model = {a_i: a, b_i: b, c_i: c} xdata = np.array([ [10.1, 9., 10.5, 11.2, 9.5, 9.6, 10.], [102.1, 101., 100.4, 100.8, 99.2, 100., 100.8], [71.6, 73.2, 69.5, 70.2, 70.8, 70.6, 70.1], ]) # Make a new scalar model. scalar_model = {a_i: a + b} simple_fit = Fit( model=scalar_model, a_i=xdata[0], ) self.assertIsInstance(simple_fit.fit, NumericalLeastSquares) constrained_fit = Fit(model=scalar_model, a_i=xdata[0], constraints=[Equality(a + b, 110)]) self.assertIsInstance(constrained_fit.fit, ConstrainedNumericalLeastSquares) a.min = 0 a.max = 25 a.value = 10 b.min = 80 b.max = 120 b.value = 100 bound_fit = Fit( model=scalar_model, a_i=xdata[0], ) self.assertIsInstance(bound_fit.fit, ConstrainedNumericalLeastSquares) # Repeat all of the above for the Vector model simple_fit = Fit( model=model, a_i=xdata[0], b_i=xdata[1], c_i=xdata[2], ) self.assertIsInstance(simple_fit.fit, ConstrainedNumericalLeastSquares) constrained_fit = Fit(model=model, a_i=xdata[0], b_i=xdata[1], c_i=xdata[2], constraints=[Equality(a + b + c, 180)]) self.assertIsInstance(constrained_fit.fit, ConstrainedNumericalLeastSquares) a.min = 0 a.max = 25 a.value = 10 b.min = 80 b.max = 120 b.value = 100 bound_fit = Fit( model=model, a_i=xdata[0], b_i=xdata[1], c_i=xdata[2], ) self.assertIsInstance(bound_fit.fit, ConstrainedNumericalLeastSquares) fit_result = bound_fit.execute() self.assertAlmostEqual(fit_result.params.a, np.mean(xdata[0]), 6) self.assertAlmostEqual(fit_result.params.b, np.mean(xdata[1]), 6) self.assertAlmostEqual(fit_result.params.c, np.mean(xdata[2]), 6)
def test_vector_parameter_error(): """ Tests `Fit` parameter error estimation with vector models. This is done by using the typical angles of a triangle example. For completeness, we throw in covariance between the angles. As per 0.5.0 this test has been updated in an important way. Previously the covariance matrix was estimated on a per component basis for global fitting problems. This was incorrect, but no solution was possible at the time. Now, we calculate the covariance matrix from the Hessian of the function being optimized, and so now the covariance is calculated correctly in those scenarios. As a result for this particular test however, it means we lose sensitivity to the error of each parameter separately. This makes sense, since the uncertainty is now being spread out over the components. To regain this, the user should just fit the components separately. """ N = 10000 a, b, c = parameters('a, b, c') a_i, b_i, c_i = variables('a_i, b_i, c_i') model = {a_i: a, b_i: b, c_i: c} np.random.seed(1) # Sample from a multivariate normal with correlation. pcov = np.array([[0.4, 0.3, 0.5], [0.3, 0.8, 0.4], [0.5, 0.4, 1.2]]) xdata = np.random.multivariate_normal([10, 100, 70], pcov, N).T fit = Fit( model=model, a_i=xdata[0], b_i=xdata[1], c_i=xdata[2], ) fit_std = Fit(model=model, a_i=xdata[0], b_i=xdata[1], c_i=xdata[2], minimizer=MINPACK) fit_new_result = fit.execute() std_result = fit_std.execute() # When no errors are given, we default to `absolute_sigma=False`, since # that is the best we can do. assert not fit.absolute_sigma assert not fit_std.absolute_sigma # The standard method and the Constrained object called without constraints # should give roughly the same parameter values. assert fit_new_result.value(a) == pytest.approx(std_result.value(a), 1e-3) assert fit_new_result.value(b) == pytest.approx(std_result.value(b), 1e-3) assert fit_new_result.value(c) == pytest.approx(std_result.value(c), 1e-3) # in this toy model, fitting is identical to simply taking the average assert fit_new_result.value(a) == pytest.approx(np.mean(xdata[0]), 1e-4) assert fit_new_result.value(b) == pytest.approx(np.mean(xdata[1]), 1e-4) assert fit_new_result.value(c) == pytest.approx(np.mean(xdata[2]), 1e-4) # All stdev's must be equal assert fit_new_result.stdev(a) == pytest.approx(fit_new_result.stdev(b), 1e-3) assert fit_new_result.stdev(a) == pytest.approx(fit_new_result.stdev(c), 1e-3) # Test for a miss on the exact value assert not fit_new_result.stdev(a) == pytest.approx( np.sqrt(pcov[0, 0] / N), 1e-3) assert not fit_new_result.stdev(b) == pytest.approx( np.sqrt(pcov[1, 1] / N), 1e-3) assert not fit_new_result.stdev(c) == pytest.approx( np.sqrt(pcov[2, 2] / N), 1e-3) # The standard object actually does not predict the right values for # stdev, because its method for computing them apparently does not allow # for vector valued functions. # So actually, for vector valued functions its better to use # Fit, though this does not give covariances. # With the correct values of sigma, absolute_sigma=True should be in # agreement with analytical. sigmadata = np.array( [np.sqrt(pcov[0, 0]), np.sqrt(pcov[1, 1]), np.sqrt(pcov[2, 2])]) fit = Fit( model=model, a_i=xdata[0], b_i=xdata[1], c_i=xdata[2], sigma_a_i=sigmadata[0], sigma_b_i=sigmadata[1], sigma_c_i=sigmadata[2], ) assert fit.absolute_sigma fit_result = fit.execute() # The standard deviation in the mean is stdev/sqrt(N), # see test_param_error_analytical assert fit_result.stdev(a) / np.sqrt(pcov[0, 0] / N) == pytest.approx( 1.0, 1e-4) assert fit_result.stdev(b) / np.sqrt(pcov[1, 1] / N) == pytest.approx( 1.0, 1e-4) assert fit_result.stdev(c) / np.sqrt(pcov[2, 2] / N) == pytest.approx( 1.0, 1e-4) # Finally, we should confirm that with unrealistic sigma and # absolute_sigma=True, we are no longer in agreement with the analytical result # Let's take everything to be 1 to point out the dangers of doing so. sigmadata = np.array([1, 1, 1]) fit2 = Fit(model=model, a_i=xdata[0], b_i=xdata[1], c_i=xdata[2], sigma_a_i=sigmadata[0], sigma_b_i=sigmadata[1], sigma_c_i=sigmadata[2], absolute_sigma=True) fit_result = fit2.execute() # Should be off bigly assert not fit_result.stdev(a) / np.sqrt(pcov[0, 0] / N) == pytest.approx( 1.0, 1e-1) assert not fit_result.stdev(b) / np.sqrt(pcov[1, 1] / N) == pytest.approx( 1.0, 1e-1) assert not fit_result.stdev(c) / np.sqrt(pcov[2, 2] / N) == pytest.approx( 1.0, 1e-5)
def fit_lanes(self, points_left, points_right, fit_globally=False) -> dict: """ Applies and returns a polynomial fit for given points along the left and right lane line. Both lanes are described by a second order polynomial x(y) = ay^2 + by + x0. In the `fit_globally` case, a and b are modeled as equal, making the lines perfectly parallel. Otherwise, each line is fit independent of the other. The parameters of the model are returned in a dictionary with keys 'al', 'bl', 'x0l' for the left lane parameters and 'ar', 'br', 'x0r' for the right lane. :param points_left: Two lists of the x and y positions along the left lane line. :param points_right: Two lists of the x and y positions along the right lane line. :param fit_globally: Set True to use the global, parallel line fit model. In practice this does not allays work. :return: fit_vals, a dictionary containing the fitting parameters for the left and right lane as above. """ xl, yl = points_left xr, yr = points_right fit_vals = {} if fit_globally: # Define global model to fit x_left, y_left, x_right, y_right = symfit.variables( 'x_left, y_left, x_right, y_right') a, b, x0_left, x0_right = symfit.parameters( 'a, b, x0_left, x0_right') model = symfit.Model({ x_left: a * y_left**2 + b * y_left + x0_left, x_right: a * y_right**2 + b * y_right + x0_right }) # Apply fit xl, yl = points_left xr, yr = points_right fit = symfit.Fit(model, x_left=xl, y_left=yl, x_right=xr, y_right=yr) fit = fit.execute() fit_vals.update({ 'ar': fit.value(a), 'al': fit.value(a), 'bl': fit.value(b), 'br': fit.value(b), 'x0l': fit.value(x0_left), 'x0r': fit.value(x0_right) }) else: # Fit lines independently x, y = symfit.variables('x, y') a, b, x0 = symfit.parameters('a, b, x0') model = symfit.Model({ x: a * y**2 + b * y + x0, }) # Apply fit on left fit = symfit.Fit(model, x=xl, y=yl) fit = fit.execute() fit_vals.update({ 'al': fit.value(a), 'bl': fit.value(b), 'x0l': fit.value(x0) }) # Apply fit on right fit = symfit.Fit(model, x=xr, y=yr) fit = fit.execute() fit_vals.update({ 'ar': fit.value(a), 'br': fit.value(b), 'x0r': fit.value(x0) }) return fit_vals
import seaborn as sns palette = sns.color_palette() # First order reaction kinetics. Data taken from http://chem.libretexts.org/Core/Physical_Chemistry/Kinetics/Rate_Laws/The_Rate_Law tdata = np.array([ 0, 0.9184, 9.0875, 11.2485, 17.5255, 23.9993, 27.7949, 31.9783, 35.2118, 42.973, 46.6555, 50.3922, 55.4747, 61.827, 65.6603, 70.0939 ]) concentration = np.array([ 0.906, 0.8739, 0.5622, 0.5156, 0.3718, 0.2702, 0.2238, 0.1761, 0.1495, 0.1029, 0.086, 0.0697, 0.0546, 0.0393, 0.0324, 0.026 ]) # Define our ODE model A, t = variables('A, t') k = Parameter() model = ODEModel({D(A, t): -k * A}, initial={t: tdata[0], A: concentration[0]}) fit = Fit(model, A=concentration, t=tdata) fit_result = fit.execute() print(fit_result) # Plotting, irrelevant to the symfit part. t_axis = np.linspace(np.min(tdata), np.max(tdata)) y, = model(t=t_axis, **fit_result.params) sns.regplot(tdata, concentration, fit_reg=False) plt.plot(t_axis, y, color=palette[2]) plt.xlabel('t /min') plt.ylabel('[A] /M')
def test_global_fitting(): """ Test a global fitting scenario with datasets of unequal length. In this scenario, a quartic equation is fitted where the constant term is shared between the datasets. (e.g. identical background noise) """ x_1, x_2, y_1, y_2 = variables('x_1, x_2, y_1, y_2') y0, a_1, a_2, b_1, b_2 = parameters('y0, a_1, a_2, b_1, b_2') # The following vector valued function links all the equations together # as stated in the intro. model = Model({ y_1: a_1 * x_1**2 + b_1 * x_1 + y0, y_2: a_2 * x_2**2 + b_2 * x_2 + y0, }) # Generate data from this model # xdata = np.linspace(0, 10) xdata1 = np.linspace(0, 10) xdata2 = xdata1[::2] # Make the sets of unequal size ydata1, ydata2 = model(x_1=xdata1, x_2=xdata2, a_1=101.3, b_1=0.5, a_2=56.3, b_2=1.1111, y0=10.8) # Add some noise to make it appear like real data np.random.seed(1) ydata1 += np.random.normal(0, 2, size=ydata1.shape) ydata2 += np.random.normal(0, 2, size=ydata2.shape) xdata = [xdata1, xdata2] ydata = [ydata1, ydata2] # Guesses a_1.value = 100 a_2.value = 50 b_1.value = 1 b_2.value = 1 y0.value = 10 eval_jac = model.eval_jacobian(x_1=xdata1, x_2=xdata2, a_1=101.3, b_1=0.5, a_2=56.3, b_2=1.1111, y0=10.8) assert len(eval_jac) == 2 for comp in eval_jac: assert len(comp) == len(model.params) sigma_y = np.concatenate((np.ones(20), [2., 4., 5, 7, 3])) fit = Fit(model, x_1=xdata[0], x_2=xdata[1], y_1=ydata[0], y_2=ydata[1], sigma_y_2=sigma_y) fit_result = fit.execute() # fit_curves = model(x_1=xdata[0], x_2=xdata[1], **fit_result.params) assert fit_result.value(y0) == pytest.approx(1.061892e+01, 1e-03) assert fit_result.value(a_1) == pytest.approx(1.013269e+02, 1e-03) assert fit_result.value(a_2) == pytest.approx(5.625694e+01, 1e-03) assert fit_result.value(b_1) == pytest.approx(3.362240e-01, 1e-03) assert fit_result.value(b_2) == pytest.approx(1.565253e+00, 1e-03)
def test_pickle(): """ Test the picklability of the different minimizers. """ # Create test data xdata = np.linspace(0, 100, 100) # From 0 to 100 in 100 steps a_vec = np.random.normal(15.0, scale=2.0, size=xdata.shape) b_vec = np.random.normal(100, scale=2.0, size=xdata.shape) ydata = a_vec * xdata + b_vec # Point scattered around the line 5 * x + 105 # Normal symbolic fit a = Parameter('a', value=0, min=0.0, max=1000) b = Parameter('b', value=0, min=0.0, max=1000) x, y = variables('x, y') # Make a set of all ScipyMinimizers, and add a chained minimizer. scipy_minimizers = list(subclasses(ScipyMinimize)) chained_minimizer = (DifferentialEvolution, BFGS) scipy_minimizers.append(chained_minimizer) constrained_minimizers = subclasses(ScipyConstrainedMinimize) # Test for all of them if they can be pickled. for minimizer in scipy_minimizers: if minimizer in constrained_minimizers: constraints = [Ge(b, a)] else: constraints = [] model = CallableNumericalModel( {y: f}, connectivity_mapping={y: {x, a, b}}, ) fit = Fit(model, x=xdata, y=ydata, minimizer=minimizer, constraints=constraints) if minimizer is not MINPACK: assert isinstance(fit.objective, LeastSquares) assert isinstance(fit.minimizer.objective, LeastSquares) else: assert isinstance(fit.objective, VectorLeastSquares) assert isinstance(fit.minimizer.objective, VectorLeastSquares) fit = fit.minimizer # Just check if the minimizer pickles dump = pickle.dumps(fit) pickled_fit = pickle.loads(dump) problematic_attr = [ 'objective', '_pickle_kwargs', 'wrapped_objective', 'constraints', 'wrapped_constraints', 'local_minimizer', 'minimizers' ] for key, value in fit.__dict__.items(): new_value = pickled_fit.__dict__[key] try: assert value == new_value except AssertionError as err: if key not in problematic_attr: raise err # These attr are new instances, and therefore do not # pass an equality test. All we can do is see if they # are at least the same type. if isinstance(value, (list, tuple)): for val1, val2 in zip(value, new_value): assert isinstance(val1, val2.__class__) if key == 'constraints': assert val1.model.constraint_type == val2.model.constraint_type assert list(val1.model.model_dict.values())[0] == list(val2.model.model_dict.values())[0] assert val1.model.independent_vars == val2.model.independent_vars assert val1.model.params == val2.model.params assert val1.model.__signature__ == val2.model.__signature__ elif key == 'wrapped_constraints': if isinstance(val1, dict): assert val1['type'] == val2['type'] assert set(val1.keys()) == set(val2.keys()) elif isinstance(val1, NonlinearConstraint): # For trust-ncg we manually check if # their dicts are equal, because no # __eq__ is implemented on # NonLinearConstraint assert len(val1.__dict__) == len(val2.__dict__) for key in val1.__dict__: try: assert val1.__dict__[key] == val2.__dict__[key] except AssertionError: assert isinstance(val1.__dict__[key], val2.__dict__[key].__class__) else: raise NotImplementedError('No such constraint type is known.') elif key == '_pickle_kwargs': FitResults._array_safe_dict_eq(value, new_value) else: assert isinstance(new_value, value.__class__) assert set(fit.__dict__.keys()) == set(pickled_fit.__dict__.keys()) # Test if we converge to the same result. np.random.seed(2) res_before = fit.execute() np.random.seed(2) res_after = pickled_fit.execute() assert FitResults._array_safe_dict_eq(res_before.__dict__, res_after.__dict__)
def test_vector_parameter_error(self): """ Tests `ConstrainedNumericalLeastSquares` parameter error estimation with vector models. This is done by using the typical angles of a triangle example. For completeness, we throw in covariance between the angles. As it stands now, `ConstrainedNumericalLeastSquares` is able to correctly predict the values of the parameters an their standard deviations, but it is not able to give the covariances. Those are therefore returned as nan, to prevent people from citing them as 0.0. """ N = 10000 a, b, c = parameters('a, b, c') a_i, b_i, c_i = variables('a_i, b_i, c_i') model = {a_i: a, b_i: b, c_i: c} np.random.seed(1) # Sample from a multivariate normal with correlation. pcov = np.array([[0.4, 0.3, 0.5], [0.3, 0.8, 0.4], [0.5, 0.4, 1.2]]) xdata = np.random.multivariate_normal([10, 100, 70], pcov, N).T fit = ConstrainedNumericalLeastSquares( model=model, a_i=xdata[0], b_i=xdata[1], c_i=xdata[2], ) fit_std = NumericalLeastSquares( model=model, a_i=xdata[0], b_i=xdata[1], c_i=xdata[2], ) fit_new_result = fit.execute(tol=1e-9) std_result = fit_std.execute() # When no errors are given, we default to `absolute_sigma=False`, since # that is the best we can do. self.assertFalse(fit.absolute_sigma) # The standard method and the Constrained object called without constraints # should give roughly the same parameter values. self.assertAlmostEqual(fit_new_result.value(a), std_result.value(a), 3) self.assertAlmostEqual(fit_new_result.value(b), std_result.value(b), 3) self.assertAlmostEqual(fit_new_result.value(c), std_result.value(c), 3) # in this toy model, fitting is identical to simply taking the average self.assertAlmostEqual(fit_new_result.value(a), np.mean(xdata[0]), 4) self.assertAlmostEqual(fit_new_result.value(b), np.mean(xdata[1]), 4) self.assertAlmostEqual(fit_new_result.value(c), np.mean(xdata[2]), 4) # Since no sigma were provided, absolute_sigma=False. Therefore the # standard deviation doesn't match the expected value, but it does match the emperical value self.assertAlmostEqual( fit_new_result.stdev(a) / (np.std(xdata[0], ddof=1) / np.sqrt(N)), 1.0, 3) self.assertAlmostEqual( fit_new_result.stdev(b) / (np.std(xdata[1], ddof=1) / np.sqrt(N)), 1.0, 3) self.assertAlmostEqual( fit_new_result.stdev(c) / (np.std(xdata[2], ddof=1) / np.sqrt(N)), 1.0, 3) # Test for a miss on the exact value self.assertNotAlmostEqual( fit_new_result.stdev(a) / np.sqrt(pcov[0, 0] / N), 1.0, 3) self.assertNotAlmostEqual( fit_new_result.stdev(b) / np.sqrt(pcov[1, 1] / N), 1.0, 3) self.assertNotAlmostEqual( fit_new_result.stdev(c) / np.sqrt(pcov[2, 2] / N), 1.0, 3) # The standard object actually does not predict the right values for # stdev, because its method for computing them apperantly does not allow # for vector valued functions. # So actually, for vector valued functions its better to use # ConstrainedNumericalLeastSquares, though this does not give covariances. # With the correct values of sigma, absolute_sigma=True should be in # agreement with analytical. sigmadata = np.array( [np.sqrt(pcov[0, 0]), np.sqrt(pcov[1, 1]), np.sqrt(pcov[2, 2])]) fit = ConstrainedNumericalLeastSquares( model=model, a_i=xdata[0], b_i=xdata[1], c_i=xdata[2], sigma_a_i=sigmadata[0], sigma_b_i=sigmadata[1], sigma_c_i=sigmadata[2], ) self.assertTrue(fit.absolute_sigma) fit_result = fit.execute(tol=1e-9) # The standard deviation in the mean is stdev/sqrt(N), # see test_param_error_analytical self.assertAlmostEqual( fit_result.stdev(a) / np.sqrt(pcov[0, 0] / N), 1.0, 4) self.assertAlmostEqual( fit_result.stdev(b) / np.sqrt(pcov[1, 1] / N), 1.0, 4) self.assertAlmostEqual( fit_result.stdev(c) / np.sqrt(pcov[2, 2] / N), 1.0, 4) # Finally, we should confirm that with unrealistic sigma and # absolute_sigma=True, we are no longer in agreement with the analytical result # Let's take everything to be 1 to point out the dangers of doing so. sigmadata = np.array([1, 1, 1]) fit2 = ConstrainedNumericalLeastSquares(model=model, a_i=xdata[0], b_i=xdata[1], c_i=xdata[2], sigma_a_i=sigmadata[0], sigma_b_i=sigmadata[1], sigma_c_i=sigmadata[2], absolute_sigma=True) fit_result = fit2.execute(tol=1e-9) # Should be off bigly self.assertNotAlmostEqual( fit_result.stdev(a) / np.sqrt(pcov[0, 0] / N), 1.0, 1) self.assertNotAlmostEqual( fit_result.stdev(b) / np.sqrt(pcov[1, 1] / N), 1.0, 1) self.assertNotAlmostEqual( fit_result.stdev(c) / np.sqrt(pcov[2, 2] / N), 1.0, 1)
def test_CallableNumericalModel(): x, y, z = variables('x, y, z') a, b = parameters('a, b') model = CallableModel({y: a * x + b}) numerical_model = CallableNumericalModel( {y: lambda x, a, b: a * x + b}, connectivity_mapping={y: {x, a, b}} ) assert model.__signature__ == numerical_model.__signature__ xdata = np.linspace(0, 10) ydata = model(x=xdata, a=5.5, b=15.0).y + np.random.normal(0, 1) symbolic_answer = np.array(model(x=xdata, a=5.5, b=15.0)) numerical_answer = np.array(numerical_model(x=xdata, a=5.5, b=15.0)) assert numerical_answer == pytest.approx(symbolic_answer) faulty_model = CallableNumericalModel({y: lambda x, a, b: a * x + b}, connectivity_mapping={y: {a, b}}) assert not model.__signature__ == faulty_model.__signature__ with pytest.raises(TypeError): # This is an incorrect signature, even though the lambda function is # correct. Should fail. faulty_model(xdata, 5.5, 15.0) # Faulty model whose components do not all accept all of the args with pytest.warns(DeprecationWarning): faulty_model = CallableNumericalModel( {y: lambda x, a, b: a * x + b, z: lambda x, a: x**a}, [x], [a, b] ) assert model.__signature__ == faulty_model.__signature__ with pytest.raises(TypeError): # Lambda got an unexpected keyword 'b' faulty_model(xdata, 5.5, 15.0) # Faulty model with a wrongly named argument faulty_model = CallableNumericalModel( {y: lambda x, a, c=5: a * x + c}, connectivity_mapping={y: {x, a, b}} ) assert model.__signature__ == faulty_model.__signature__ with pytest.raises(TypeError): # Lambda got an unexpected keyword 'b' faulty_model(xdata, 5.5, 15.0) # Correct version of the previous model numerical_model = CallableNumericalModel( {y: lambda x, a, b: a * x + b, z: lambda x, a: x ** a}, connectivity_mapping={y: {a, b, x}, z: {x, a}} ) # Correct version of the previous model mixed_model = CallableNumericalModel( {y: lambda x, a, b: a * x + b, z: x ** a}, connectivity_mapping={y: {x, a, b}} ) numberical_answer = np.array(numerical_model(x=xdata, a=5.5, b=15.0)) mixed_answer = np.array(mixed_model(x=xdata, a=5.5, b=15.0)) assert numberical_answer == pytest.approx(mixed_answer) zdata = mixed_model(x=xdata, a=5.5, b=15.0).z + np.random.normal(0, 1) # Check if the fits are the same fit = Fit(mixed_model, x=xdata, y=ydata, z=zdata) mixed_result = fit.execute() fit = Fit(numerical_model, x=xdata, y=ydata, z=zdata) numerical_result = fit.execute() for param in [a, b]: assert mixed_result.value(param) == pytest.approx(numerical_result.value(param)) if mixed_result.stdev(param) is not None and numerical_result.stdev(param) is not None: assert mixed_result.stdev(param) == pytest.approx(numerical_result.stdev(param)) else: assert mixed_result.stdev(param) is None and numerical_result.stdev(param) is None assert mixed_result.r_squared == pytest.approx(numerical_result.r_squared) # Test if the constrained syntax is supported fit = Fit(numerical_model, x=xdata, y=ydata, z=zdata, constraints=[Eq(a, b)]) constrained_result = fit.execute() assert constrained_result.value(a) == pytest.approx(constrained_result.value(b))
# Create a new column with the number of days since first infection (the x-axis) country_first_dates = { c: sorted_data[sorted_data["country"] == c].index.min() for c in countries } sorted_data["100_cases"] = sorted_data.apply( lambda x: country_first_dates[x.country], axis=1) sorted_data["days_since_100_cases"] = ( sorted_data.index - sorted_data["100_cases"]).apply(lambda x: x.days) logger.info("Training...") fit_result = {} ode_model = {} for country in countries: t, S, I, R, D = sf.variables("t, S, I, R, D") p_susceptible = 0.00085 N_mu = populations[country] * p_susceptible β_0, γ_0, μ_0 = 0.35, 0.1, 0.03 N_0 = N_mu β = sf.Parameter("β", value=β_0, min=0.1, max=0.5) γ = sf.Parameter("γ", value=γ_0, min=0.01, max=0.2) N = sf.Parameter("N", value=N_0, min=1e4, max=1e7) μ = sf.Parameter("μ", value=μ_0, min=0.0001, max=0.1) print(country, N_0) model_dict = { sf.D(S, t): -β * I * S / N, sf.D(I, t): β * I * S / N - γ * I - μ * I, sf.D(R, t): γ * I,
def test_interdependency(): a, b = parameters('a, b') x, y, z = variables('x, y, z') model_dict = { y: a**3 * x + b**2, z: y**2 + a * b } callable_model = CallableModel(model_dict) assert callable_model.independent_vars == [x] assert callable_model.interdependent_vars == [y] assert callable_model.dependent_vars == [z] assert callable_model.params == [a, b] assert callable_model.connectivity_mapping == {y: {a, b, x}, z: {a, b, y}} assert callable_model(x=3, a=1, b=2) == pytest.approx(np.atleast_2d([7, 51]).T) for var, func in callable_model.vars_as_functions.items(): # TODO comment on what this does str_con_map = set(x.name for x in callable_model.connectivity_mapping[var]) str_args = set(str(x.__class__) if isinstance(x, Function) else x.name for x in func.args) assert str_con_map == str_args jac_model = jacobian_from_model(callable_model) assert jac_model.params == [a, b] assert jac_model.dependent_vars == [D(z, a), D(z, b), z] assert jac_model.interdependent_vars == [D(y, a), D(y, b), y] assert jac_model.independent_vars == [x] for p1, p2 in zip_longest(jac_model.__signature__.parameters, [x, a, b]): assert str(p1) == str(p2) # The connectivity of jac_model should be that from it's own components # plus that of the model. The latter is needed to properly compute the # Hessian. jac_con_map = {D(y, a): {a, x}, D(y, b): {b}, D(z, a): {b, y, D(y, a)}, D(z, b): {a, y, D(y, b)}, y: {a, b, x}, z: {a, b, y}} assert jac_model.connectivity_mapping == jac_con_map jac_model_dict = {D(y, a): 3 * a**2 * x, D(y, b): 2 * b, D(z, a): b + 2 * y * D(y, a), D(z, b): a + 2 * y * D(y, b), y: callable_model[y], z: callable_model[z]} assert jac_model.model_dict == jac_model_dict for var, func in jac_model.vars_as_functions.items(): str_con_map = set(x.name for x in jac_model.connectivity_mapping[var]) str_args = set(str(x.__class__) if isinstance(x, Function) else x.name for x in func.args) assert str_con_map == str_args hess_model = hessian_from_model(callable_model) # Result according to Mathematica hess_as_dict = { D(y, (a, 2)): 6 * a * x, D(y, a, b): 0, D(y, b, a): 0, D(y, (b, 2)): 2, D(z, (a, 2)): 2 * D(y, a)**2 + 2 * y * D(y, (a, 2)), D(z, a, b): 1 + 2 * D(y, b) * D(y, a) + 2 * y * D(y, a, b), D(z, b, a): 1 + 2 * D(y, b) * D(y, a) + 2 * y * D(y, a, b), D(z, (b, 2)): 2 * D(y, b)**2 + 2 * y * D(y, (b, 2)), D(y, a): 3 * a ** 2 * x, D(y, b): 2 * b, D(z, a): b + 2 * y * D(y, a), D(z, b): a + 2 * y * D(y, b), y: callable_model[y], z: callable_model[z] } assert dict(hess_model) == hess_as_dict assert hess_model.params == [a, b] assert hess_model.dependent_vars == [D(z, (a, 2)), D(z, a, b), D(z, (b, 2)), D(z, b, a), D(z, a), D(z, b), z] assert hess_model.interdependent_vars == [D(y, (a, 2)), D(y, a), D(y, b), y] assert hess_model.independent_vars == [x] model = Model(model_dict) assert model(x=3, a=1, b=2) == pytest.approx(np.atleast_2d([7, 51]).T) assert model.eval_jacobian(x=3, a=1, b=2) == pytest.approx(np.array([[[9], [4]], [[128], [57]]])) assert model.eval_hessian(x=3, a=1, b=2) == pytest.approx(np.array([[[[18], [0]], [[0], [2]]],[[[414], [73]], [[73], [60]]]])) assert model.__signature__ == model.jacobian_model.__signature__ assert model.__signature__ == model.hessian_model.__signature__
def fourier_model(self, com): x, y = variables('x, y') w, = parameters('w') model_dict = {y: self.fourier_series(x, f=w, n=com)} print(model_dict) return model_dict
def test_vector_constrained_fitting(self): """ Tests `ConstrainedNumericalLeastSquares` with vector models. The classical example of fitting measurements of the angles of a triangle is taken. In this case we know they should add up to 180 degrees, so this can be added as a constraint. Additionally, not even all three angles have to be provided with measurement data since the constrained means the angles are not independent. """ a, b, c = parameters('a, b, c') a_i, b_i, c_i = variables('a_i, b_i, c_i') model = {a_i: a, b_i: b, c_i: c} xdata = np.array([ [10.1, 9., 10.5, 11.2, 9.5, 9.6, 10.], [102.1, 101., 100.4, 100.8, 99.2, 100., 100.8], [71.6, 73.2, 69.5, 70.2, 70.8, 70.6, 70.1], ]) fit_none = ConstrainedNumericalLeastSquares( model=model, a_i=xdata[0], b_i=xdata[1], c_i=None, ) fit = ConstrainedNumericalLeastSquares( model=model, a_i=xdata[0], b_i=xdata[1], c_i=xdata[2], ) fit_std = NumericalLeastSquares( model=model, a_i=xdata[0], b_i=xdata[1], c_i=xdata[2], ) fit_constrained = ConstrainedNumericalLeastSquares( model=model, a_i=xdata[0], b_i=xdata[1], c_i=xdata[2], constraints=[Equality(a + b + c, 180)]) fit_none_result = fit_none.execute() fit_new_result = fit.execute() std_result = fit_std.execute() constr_result = fit_constrained.execute() # The total of averages should equal the total of the params by definition mean_total = np.mean(np.sum(xdata, axis=0)) params_tot = std_result.value(a) + std_result.value( b) + std_result.value(c) self.assertAlmostEqual(mean_total, params_tot, 4) # The total after constraining to 180 should be exactly 180. params_tot = constr_result.value(a) + constr_result.value( b) + constr_result.value(c) self.assertAlmostEqual(180.0, params_tot, 4) # The standard method and the Constrained object called without constraints # should behave roughly the same. self.assertAlmostEqual(fit_new_result.value(a), std_result.value(a), 4) self.assertAlmostEqual(fit_new_result.value(b), std_result.value(b), 4) self.assertAlmostEqual(fit_new_result.value(c), std_result.value(c), 4) # When fitting with a dataset set to None, for this example the value of c # should be unaffected. self.assertAlmostEqual(fit_none_result.value(a), std_result.value(a), 4) self.assertAlmostEqual(fit_none_result.value(b), std_result.value(b), 4) self.assertAlmostEqual(fit_none_result.value(c), c.value) fit_none_constr = ConstrainedNumericalLeastSquares( model=model, a_i=xdata[0], b_i=xdata[1], c_i=None, constraints=[Equality(a + b + c, 180)]) none_constr_result = fit_none_constr.execute() params_tot = none_constr_result.value(a) + none_constr_result.value( b) + none_constr_result.value(c) self.assertAlmostEqual(180.0, params_tot, 4)
def nonanalytical_func(x, a, b): """ This can be any pythonic function which should be fitted, typically one which is not easily written or supported as an analytical expression. """ # Do your non-trivial magic here. In this case a Piecewise, although this # could also be done symbolically. y = np.zeros_like(x) y[x > b] = (a * (x - b) + b)[x > b] y[x <= b] = b return y x, y1, y2 = variables('x, y1, y2') a, b = parameters('a, b') mixed_model = CallableNumericalModel({ y1: nonanalytical_func, y2: x**a }, connectivity_mapping={y1: {x, a, b}}) # Generate data xdata = np.linspace(0, 10) y1data, y2data = mixed_model(x=xdata, a=1.3, b=4) y1data = np.random.normal(y1data, 0.1 * y1data) y2data = np.random.normal(y2data, 0.1 * y2data) # Perform the fit
import numpy as np from symfit import variables, parameters, Fit, exp, Model from symfit.core.objectives import LogLikelihood # Draw samples from a bivariate distribution np.random.seed(42) data1 = np.random.exponential(5.5, 1000) data2 = np.random.exponential(6, 2000) # Define the model for an exponential distribution (numpy style) a, b = parameters('a, b') x1, y1, x2, y2 = variables('x1, y1, x2, y2') model = Model({y1: (1 / a) * exp(-x1 / a), y2: (1 / b) * exp(-x2 / b)}) print(model) fit = Fit(model, x1=data1, x2=data2, objective=LogLikelihood) fit_result = fit.execute() print(fit_result) # Instead, we could also fit with only one parameter to see which works best model = Model({y1: (1 / a) * exp(-x1 / a), y2: (1 / a) * exp(-x2 / a)}) fit = Fit(model, x1=data1, x2=data2, objective=LogLikelihood) fit_result = fit.execute() print(fit_result)
def test_fixed_and_constrained_tc(): """ Taken from #165. Make sure the TrustConstr minimizer can deal with constraints and fixed parameters. """ phi1, phi2, theta1, theta2 = parameters('phi1, phi2, theta1, theta2') x, y = variables('x, y') model_dict = { y: (1 + x * theta1 + theta2 * x**2) / (1 + phi1 * x * theta1 + phi2 * theta2 * x**2) } constraints = [GreaterThan(theta1, theta2)] xdata = np.array( [0., 0.000376, 0.000752, 0.0015, 0.00301, 0.00601, 0.00902]) ydata = np.array([ 1., 1.07968041, 1.08990638, 1.12151629, 1.13068452, 1.15484109, 1.19883952 ]) phi1.value = 0.845251484373516 phi1.fixed = True phi2.value = 0.7105427053026403 phi2.fixed = True fit = Fit(model_dict, x=xdata, y=ydata, constraints=constraints, minimizer=TrustConstr) fit_result_tc = fit.execute() # The data and fixed parameters should be partialed away. objective_kwargs = { phi2.name: phi2.value, phi1.name: phi1.value, x.name: xdata, } constraint_kwargs = { phi2.name: phi2.value, phi1.name: phi1.value, } for index, constraint in enumerate(fit.minimizer.constraints): assert isinstance(constraint, MinimizeModel) assert constraint.model == fit.constraints[index] assert constraint.data == fit.data assert constraint.data == fit.objective.data # Data should be the same memory location so they can share state. assert id(fit.objective.data) == id(constraint.data) # Test if the data and fixed params have been partialed away assert key2str( constraint._invariant_kwargs).keys() == constraint_kwargs.keys() assert key2str( fit.objective._invariant_kwargs).keys() == objective_kwargs.keys() # Compare the shapes. The constraint shape should now be the same as # that of the objective obj_val = fit.minimizer.objective(fit.minimizer.initial_guesses) obj_jac = fit.minimizer.wrapped_jacobian(fit.minimizer.initial_guesses) with pytest.raises(TypeError): len(obj_val) # scalars don't have lengths assert len(obj_jac) == 2 for index, constraint in enumerate(fit.minimizer.wrapped_constraints): assert callable(constraint.fun) assert callable(constraint.jac) # The argument should be the partialed Constraint object assert constraint.fun == fit.minimizer.constraints[index] assert isinstance(constraint.fun, MinimizeModel) # Test the shapes cons_val = constraint.fun(fit.minimizer.initial_guesses) cons_jac = constraint.jac(fit.minimizer.initial_guesses) assert cons_val.shape == (1, ) assert isinstance(cons_val[0], float) assert obj_jac.shape == cons_jac.shape assert obj_jac.shape == (2, )
def test_vector_fitting(): """ Test the behavior in the presence of bounds or constraints: `Fit` should select `ConstrainedNumericalLeastSquares` when bounds or constraints are provided, or for vector models in general. For scalar models, use `NumericalLeastSquares`. """ a, b = parameters('a, b') a_i, = variables('a_i') xdata = np.array([ [10.1, 9., 10.5, 11.2, 9.5, 9.6, 10.], [102.1, 101., 100.4, 100.8, 99.2, 100., 100.8], [71.6, 73.2, 69.5, 70.2, 70.8, 70.6, 70.1], ]) # Make a new scalar model. scalar_model = {a_i: a + b} simple_fit = Fit(model=scalar_model, a_i=xdata[0], minimizer=MINPACK) assert isinstance(simple_fit.minimizer, MINPACK) constrained_fit = Fit(model=scalar_model, a_i=xdata[0], constraints=[Equality(a + b, 110)]) assert isinstance(constrained_fit.minimizer, SLSQP) a.min = 0 a.max = 25 a.value = 10 b.min = 80 b.max = 120 b.value = 100 bound_fit = Fit( model=scalar_model, a_i=xdata[0], ) assert isinstance(bound_fit.minimizer, LBFGSB) # Repeat all of the above for the Vector model a, b, c = parameters('a, b, c') a_i, b_i, c_i = variables('a_i, b_i, c_i') model = {a_i: a, b_i: b, c_i: c} simple_fit = Fit( model=model, a_i=xdata[0], b_i=xdata[1], c_i=xdata[2], ) assert isinstance(simple_fit.minimizer, BFGS) constrained_fit = Fit(model=model, a_i=xdata[0], b_i=xdata[1], c_i=xdata[2], constraints=[Equality(a + b + c, 180)]) assert isinstance(constrained_fit.minimizer, SLSQP) a.min = 0 a.max = 25 a.value = 10 b.min = 80 b.max = 120 b.value = 100 bound_fit = Fit( model=model, a_i=xdata[0], b_i=xdata[1], c_i=xdata[2], ) assert isinstance(bound_fit.minimizer, LBFGSB) fit_result = bound_fit.execute() assert fit_result.value(a) == pytest.approx(np.mean(xdata[0]), rel=1e-6) assert fit_result.value(b) == pytest.approx(np.mean(xdata[1]), rel=1e-6) assert fit_result.value(c) == pytest.approx(np.mean(xdata[2]), rel=1e-6)
a0, a1, a2, a3 = parameters('a0, a1, a2, a3') a4, a5, a6, a7 = parameters('a4, a5, a6, a7') a0.min = 0. a1.min = 0. a2.min = 0. a3.min = 0. # a4.min = 0. # a5.min = 0. a0.value = 0.3390921000e-1 a1.value = 0.5422763222e-2 a2.value = 0.5555555556e-4 a3.value = 4.166064907e-13 x, y, z = variables('x, y, z') # model = {z: a0 + a1 * x ** 0.5 + a2 * x + a3 * x ** 3 + a4 * y + a5 * x ** 0.5 * y + a6 * x * y + a7 * x ** 3 * y} # model = {z: a0 + a1 * x ** 0.5 + a2 * x + a3 * x ** 3 + a4 * y + a5 * x ** 0.5 * y + a6 * x * y + a7 * x ** 3 * y} # model = {z: (a0 + a1 * x ** 0.5 + a2 * x + a3 * x ** 3.)*(1 + a4*y + a5*y**2)} model = {z: (a0 + a1 * x**0.5 + a2 * x + a3 * x**3.) * (1. + a4 * y)} fit = Fit(model, x=x_data, y=y_data, z=z_data) fit_result = fit.execute() # print fit_result import scipy.optimize C, res = scipy.optimize.nnls(A, z_data) # C, res, rnk, _ = scipy.linalg.lstsq(A, z_data)
def test_constrained_dependent_on_model(): """ For a simple Gaussian distribution, we test if Models of various types can be used as constraints. Of particular interest are NumericalModels, which can be used to fix the integral of the model during the fit to 1, as it should be for a probability distribution. :return: """ A, mu, sig = parameters('A, mu, sig') x, y, Y = variables('x, y, Y') i = Idx('i', (0, 1000)) sig.min = 0.0 model = GradientModel({y: A * Gaussian(x, mu=mu, sig=sig)}) # Generate data, 100 samples from a N(1.2, 2) distribution np.random.seed(2) xdata = np.random.normal(1.2, 2, 1000) ydata, xedges = np.histogram(xdata, bins=int(np.sqrt(len(xdata))), density=True) xcentres = (xedges[1:] + xedges[:-1]) / 2 # Unconstrained fit fit = Fit(model, x=xcentres, y=ydata) unconstr_result = fit.execute() # Constraints must be scalar models. with pytest.raises(ModelError): Model.as_constraint([A - 1, sig - 1], model, constraint_type=Eq) constraint_exact = Model.as_constraint(A * sqrt(2 * sympy.pi) * sig - 1, model, constraint_type=Eq) # Only when explicitly asked, do models behave as constraints. assert hasattr(constraint_exact, 'constraint_type') assert constraint_exact.constraint_type == Eq assert not hasattr(model, 'constraint_type') # Now lets make some valid constraints and see if they are respected! # FIXME These first two should be symbolical integrals over `y` instead, # but currently this is not converted into a numpy/scipy function. So # instead the first two are not valid constraints. constraint_model = Model.as_constraint(A - 1, model, constraint_type=Eq) constraint_exact = Eq(A, 1) constraint_num = CallableNumericalModel.as_constraint( { Y: lambda x, y: simps(y, x) - 1 }, # Integrate using simps model=model, connectivity_mapping={Y: {x, y}}, constraint_type=Eq) # Test for all these different types of constraint. for constraint in [constraint_model, constraint_exact, constraint_num]: if not isinstance(constraint, Eq): assert constraint.constraint_type == Eq xcentres = (xedges[1:] + xedges[:-1]) / 2 fit = Fit(model, x=xcentres, y=ydata, constraints=[constraint]) # Test if conversion into a constraint was done properly fit_constraint = fit.constraints[0] assert fit.model.params == fit_constraint.params assert fit_constraint.constraint_type == Eq con_map = fit_constraint.connectivity_mapping if isinstance(constraint, CallableNumericalModel): assert con_map == {Y: {x, y}, y: {x, mu, sig, A}} assert fit_constraint.independent_vars == [x] assert fit_constraint.dependent_vars == [Y] assert fit_constraint.interdependent_vars == [y] assert fit_constraint.params == [A, mu, sig] else: # TODO if these constraints can somehow be written as integrals # depending on y and x this if/else should be removed. assert con_map == {fit_constraint.dependent_vars[0]: {A}} assert fit_constraint.independent_vars == [] assert len(fit_constraint.dependent_vars) == 1 assert fit_constraint.interdependent_vars == [] assert fit_constraint.params == [A, mu, sig] # Finally, test if the constraint worked fit_result = fit.execute(options={'eps': 1e-15, 'ftol': 1e-10}) unconstr_value = fit.minimizer.wrapped_constraints[0]['fun']( **unconstr_result.params) constr_value = fit.minimizer.wrapped_constraints[0]['fun']( **fit_result.params) # TODO because of a bug by pytest we have to solve it like this assert constr_value[0] == pytest.approx(0, abs=1e-10) # And if it was very poorly met before assert not unconstr_value[0] == pytest.approx(0.0, 1e-1)
def test_fixed_and_constrained(): """ Taken from #165. Fixing parameters and constraining others caused a TypeError: missing a required argument: 'theta1', which was caused by a mismatch in the shape of the initial guesses given and the number of parameters constraints expected. The initial_guesses no longer contained those corresponding to fixed parameters. """ phi1, phi2, theta1, theta2 = parameters('phi1, phi2, theta1, theta2') x, y = variables('x, y') model_dict = { y: (1 + x * theta1 + theta2 * x**2) / (1 + phi1 * x * theta1 + phi2 * theta2 * x**2) } constraints = [GreaterThan(theta1, theta2)] xdata = np.array( [0., 0.000376, 0.000752, 0.0015, 0.00301, 0.00601, 0.00902]) ydata = np.array([ 1., 1.07968041, 1.08990638, 1.12151629, 1.13068452, 1.15484109, 1.19883952 ]) phi1.value = 0.845251484373516 phi1.fixed = True phi2.value = 0.7105427053026403 phi2.fixed = True fit = Fit(model_dict, x=xdata, y=ydata, constraints=constraints, minimizer=SLSQP) fit_result_slsqp = fit.execute() # The data and fixed parameters should be partialed away. objective_kwargs = { phi2.name: phi2.value, phi1.name: phi1.value, x.name: xdata, } constraint_kwargs = { phi2.name: phi2.value, phi1.name: phi1.value, } for index, constraint in enumerate(fit.minimizer.constraints): assert isinstance(constraint, MinimizeModel) assert constraint.model == fit.constraints[index] assert constraint.data == fit.data assert constraint.data == fit.objective.data # Data should be the same memory location so they can share state. assert id(fit.objective.data) == id(constraint.data) # Test if the fixed params have been partialed away assert key2str( constraint._invariant_kwargs).keys() == constraint_kwargs.keys() assert key2str( fit.objective._invariant_kwargs).keys() == objective_kwargs.keys() # Compare the shapes. The constraint shape should now be the same as # that of the objective obj_val = fit.minimizer.objective(fit.minimizer.initial_guesses) obj_jac = fit.minimizer.wrapped_jacobian(fit.minimizer.initial_guesses) # scalars don't have lengths with pytest.raises(TypeError): len(obj_val) assert len(obj_jac) == 2 for index, constraint in enumerate(fit.minimizer.wrapped_constraints): assert constraint['type'] == 'ineq' assert 'args' not in constraint assert callable(constraint['fun']) assert callable(constraint['jac']) # The argument should be the partialed Constraint object assert constraint['fun'] == fit.minimizer.constraints[index] assert isinstance(constraint['fun'], MinimizeModel) assert 'jac' in constraint # Test the shapes cons_val = constraint['fun'](fit.minimizer.initial_guesses) cons_jac = constraint['jac'](fit.minimizer.initial_guesses) assert cons_val.shape == (1, ) assert isinstance(cons_val[0], float) assert obj_jac.shape == cons_jac.shape assert obj_jac.shape == (2, )
from symfit import parameters, variables, Fit, Piecewise, exp, Eq, Model import numpy as np import matplotlib.pyplot as plt x, y = variables('x, y') a, b, x0 = parameters('a, b, x0') # Make a piecewise model y1 = x**2 - a * x y2 = a * x + b model = Model({y: Piecewise((y1, x <= x0), (y2, x > x0))}) # As a constraint, we demand equality between the two models at the point x0 # to do this, we substitute x -> x0 and demand equality using `Eq` constraints = [ Eq(y1.subs({x: x0}), y2.subs({x: x0})) ] # Generate example data xdata = np.linspace(-4, 4., 50) ydata = model(x=xdata, a=0.0, b=1.0, x0=1.0).y np.random.seed(2) ydata = np.random.normal(ydata, 0.5) # add noise # Help the fit by bounding the switchpoint between the models x0.min = 0.8 x0.max = 1.2 fit = Fit(model, x=xdata, y=ydata, constraints=constraints) fit_result = fit.execute() print(fit_result)
def test_global_fitting(): """ In case of shared parameters between the components of the model, `Fit` should automatically use `ConstrainedLeastSquares`. :return: """ x_1, x_2, y_1, y_2 = variables('x_1, x_2, y_1, y_2') y0, a_1, a_2, b_1, b_2 = parameters('y0, a_1, a_2, b_1, b_2') # The following vector valued function links all the equations together # as stated in the intro. model = Model({ y_1: a_1 * x_1**2 + b_1 * x_1 + y0, y_2: a_2 * x_2**2 + b_2 * x_2 + y0, }) assert model.shared_parameters # Generate data from this model xdata1 = np.linspace(0, 10) xdata2 = xdata1[::2] # Only every other point. ydata1, ydata2 = model(x_1=xdata1, x_2=xdata2, a_1=101.3, b_1=0.5, a_2=56.3, b_2=1.1111, y0=10.8) # Add some noise to make it appear like real data np.random.seed(1) ydata1 += np.random.normal(0, 2, size=ydata1.shape) ydata2 += np.random.normal(0, 2, size=ydata2.shape) xdata = [xdata1, xdata2] ydata = [ydata1, ydata2] # Guesses a_1.value = 100 a_2.value = 50 b_1.value = 1 b_2.value = 1 y0.value = 10 fit = Fit(model, x_1=xdata[0], x_2=xdata[1], y_1=ydata[0], y_2=ydata[1]) assert isinstance(fit.minimizer, BFGS) # The next model does not share parameters, but is still a vector model = Model({ y_1: a_1 * x_1**2 + b_1 * x_1, y_2: a_2 * x_2**2 + b_2 * x_2, }) fit = Fit(model, x_1=xdata[0], x_2=xdata[1], y_1=ydata[0], y_2=ydata[1]) assert not model.shared_parameters assert isinstance(fit.minimizer, BFGS) # Scalar model, still use bfgs. model = Model({ y_1: a_1 * x_1**2 + b_1 * x_1, }) fit = Fit(model, x_1=xdata[0], y_1=ydata[0]) assert model.shared_parameters is False assert isinstance(fit.minimizer, BFGS)
def test_LogLikelihood(): """ Tests if the LeastSquares objective gives the right shapes of output by comparing with its analytical equivalent. """ # TODO: update these tests to use indexed variables in the future a, b = parameters('a, b') i = Idx('i', 100) x, y = variables('x, y') pdf = Exp(x, 1 / a) * Exp(x, b) np.random.seed(10) xdata = np.random.exponential(3.5, 100) # We use minus loglikelihood for the model, because the objective was # designed to find the maximum when used with a *minimizer*, so it has # opposite sign. Also test MinimizeModel at the same time. logL_model = Model({y: pdf}) logL_exact = Model({y: -FlattenSum(log(pdf), i)}) logL_numerical = LogLikelihood(logL_model, {x: xdata, y: None}) logL_minmodel = MinimizeModel(logL_exact, data={x: xdata, y: None}) # Test model jacobian and hessian shape eval_exact = logL_exact(x=xdata, a=2, b=3) jac_exact = logL_exact.eval_jacobian(x=xdata, a=2, b=3) hess_exact = logL_exact.eval_hessian(x=xdata, a=2, b=3) eval_minimizemodel = logL_minmodel(a=2, b=3) jac_minimizemodel = logL_minmodel.eval_jacobian(a=2, b=3) hess_minimizemodel = logL_minmodel.eval_hessian(a=2, b=3) eval_numerical = logL_numerical(a=2, b=3) jac_numerical = logL_numerical.eval_jacobian(a=2, b=3) hess_numerical = logL_numerical.eval_hessian(a=2, b=3) # TODO: These shapes should not have the ones! This is due to the current # convention that scalars should be returned as a 1d array by Model's. assert eval_exact[0].shape == (1, ) assert jac_exact[0].shape == (2, 1) assert hess_exact[0].shape == (2, 2, 1) # Test if identical to MinimizeModel assert eval_exact[0] == pytest.approx(eval_minimizemodel) assert jac_exact[0] == pytest.approx(jac_minimizemodel) assert hess_exact[0] == pytest.approx(hess_minimizemodel) # Test if these two models have the same call, jacobian, and hessian. # Since models always have components as their first dimension, we have # to slice that away. assert eval_exact.y == pytest.approx(eval_numerical) assert isinstance(eval_numerical, float) assert isinstance(eval_exact.y[0], float) assert np.squeeze(jac_exact[0], axis=-1) == pytest.approx(jac_numerical) assert isinstance(jac_numerical, np.ndarray) assert np.squeeze(hess_exact[0], axis=-1) == pytest.approx(hess_numerical) assert isinstance(hess_numerical, np.ndarray) fit = Fit(logL_exact, x=xdata, objective=MinimizeModel) fit_exact_result = fit.execute() fit = Fit(logL_model, x=xdata, objective=LogLikelihood) fit_num_result = fit.execute() assert fit_exact_result.value(a) == pytest.approx(fit_num_result.value(a)) assert fit_exact_result.value(b) == pytest.approx(fit_num_result.value(b)) assert fit_exact_result.stdev(a) == pytest.approx(fit_num_result.stdev(a)) assert fit_exact_result.stdev(b) == pytest.approx(fit_num_result.stdev(b))