def test_known_solution(self): p, c1 = parameters('p, c1') y, t = variables('y, t') p.value = 3.0 model_dict = { D(y, t): - p * y, } # Lets say we know the exact solution to this problem sol = Model({y: exp(- p * t)}) # Generate some data tdata = np.linspace(0, 3, 10001) ydata = sol(t=tdata, p=3.22)[0] ydata += np.random.normal(0, 0.005, ydata.shape) ode_model = ODEModel(model_dict, initial={t: 0.0, y: ydata[0]}) fit = Fit(ode_model, t=tdata, y=ydata) ode_result = fit.execute() c1.value = ydata[0] fit = Fit(sol, t=tdata, y=ydata) fit_result = fit.execute() self.assertAlmostEqual(ode_result.value(p) / fit_result.value(p), 1, 2) self.assertAlmostEqual(ode_result.r_squared / fit_result.r_squared, 1, 4) self.assertAlmostEqual(ode_result.stdev(p) / fit_result.stdev(p), 1, 3)
def test_full_eval_range(self): """ Test if ODEModels can be evaluated at t < t_initial. A bit of a no news is good news test. """ tdata = np.array([0, 10, 26, 44, 70, 120]) adata = 10e-4 * np.array([54, 44, 34, 27, 20, 14]) a, b, t = variables('a, b, t') k, a0 = parameters('k, a0') k.value = 0.01 t0 = tdata[2] a0 = adata[2] b0 = 0.02729855 # Obtained from evaluating from t=0. model_dict = { D(a, t): - k * a**2, D(b, t): k * a**2, } ode_model = ODEModel(model_dict, initial={t: t0, a: a0, b: b0}) fit = Fit(ode_model, t=tdata, a=adata, b=None) ode_result = fit.execute() self.assertGreater(ode_result.r_squared, 0.95, 4) # Now start from a timepoint that is not in the t-array such that it # triggers another pathway to be taken in integrating it. # Again, no news is good news. ode_model = ODEModel(model_dict, initial={t: t0 + 1e-5, a: a0, b: b0}) fit = Fit(ode_model, t=tdata, a=adata, b=None) ode_result = fit.execute() self.assertGreater(ode_result.r_squared, 0.95, 4)
def test_likelihood_fitting_exponential(self): """ Fit using the likelihood method. """ b = Parameter(value=4, min=3.0) x, y = variables('x, y') pdf = {y: Exp(x, 1/b)} # Draw points from an Exp(5) exponential distribution. np.random.seed(100) xdata = np.random.exponential(5, 1000000) # Expected parameter values mean = np.mean(xdata) stdev = np.std(xdata) mean_stdev = stdev / np.sqrt(len(xdata)) with self.assertRaises(NotImplementedError): fit = Fit(pdf, x=xdata, sigma_y=2.0, objective=LogLikelihood) fit = Fit(pdf, xdata, objective=LogLikelihood) fit_result = fit.execute() self.assertAlmostEqual(fit_result.value(b) / mean, 1, 3) self.assertAlmostEqual(fit_result.value(b) / stdev, 1, 3) self.assertAlmostEqual(fit_result.stdev(b) / mean_stdev, 1, 3)
def test_gaussian_fitting(self): """ Tests fitting to a gaussian function and fit_result.params unpacking. """ xdata = 2*np.random.rand(10000) - 1 # random betwen [-1, 1] ydata = 5.0 * scipy.stats.norm.pdf(xdata, loc=0.0, scale=1.0) x0 = Parameter() sig = Parameter() A = Parameter() x = Variable() g = A * Gaussian(x, x0, sig) fit = Fit(g, xdata, ydata) fit_result = fit.execute() self.assertAlmostEqual(fit_result.value(A), 5.0) self.assertAlmostEqual(np.abs(fit_result.value(sig)), 1.0) self.assertAlmostEqual(fit_result.value(x0), 0.0) # raise Exception([i for i in fit_result.params]) sexy = g(x=2.0, **fit_result.params) ugly = g( x=2.0, x0=fit_result.value(x0), A=fit_result.value(A), sig=fit_result.value(sig), ) self.assertEqual(sexy, ugly)
def test_likelihood_fitting_gaussian(self): """ Fit using the likelihood method. """ mu, sig = parameters('mu, sig') sig.min = 0.01 sig.value = 3.0 mu.value = 50. x = Variable() pdf = Gaussian(x, mu, sig) np.random.seed(10) xdata = np.random.normal(51., 3.5, 10000) # Expected parameter values mean = np.mean(xdata) stdev = np.std(xdata) mean_stdev = stdev/np.sqrt(len(xdata)) fit = Fit(pdf, xdata, objective=LogLikelihood) fit_result = fit.execute() self.assertAlmostEqual(fit_result.value(mu) / mean, 1, 6) self.assertAlmostEqual(fit_result.stdev(mu) / mean_stdev, 1, 3) self.assertAlmostEqual(fit_result.value(sig) / np.std(xdata), 1, 6)
def test_vector_fitting(self): """ Tests fitting to a 3 component vector valued function, without bounds or guesses. """ a, b, c = parameters('a, b, c') a_i, b_i, c_i = variables('a_i, b_i, c_i') model = {a_i: a, b_i: b, c_i: c} xdata = np.array([ [10.1, 9., 10.5, 11.2, 9.5, 9.6, 10.], [102.1, 101., 100.4, 100.8, 99.2, 100., 100.8], [71.6, 73.2, 69.5, 70.2, 70.8, 70.6, 70.1], ]) fit = Fit( model=model, a_i=xdata[0], b_i=xdata[1], c_i=xdata[2], minimizer = MINPACK ) fit_result = fit.execute() self.assertAlmostEqual(fit_result.value(a) / 9.985691, 1.0, 5) self.assertAlmostEqual(fit_result.value(b) / 1.006143e+02, 1.0, 4) self.assertAlmostEqual(fit_result.value(c) / 7.085713e+01, 1.0, 5)
def test_fitting(self): xdata = np.linspace(1,10,10) ydata = 3*xdata**2 a = Parameter() #3.1, min=2.5, max=3.5 b = Parameter() x = Variable() new = a*x**b fit = Fit(new, xdata, ydata, minimizer=MINPACK) fit_result = fit.execute() self.assertIsInstance(fit_result, FitResults) self.assertAlmostEqual(fit_result.value(a), 3.0) self.assertAlmostEqual(fit_result.value(b), 2.0) self.assertIsInstance(fit_result.stdev(a), float) self.assertIsInstance(fit_result.stdev(b), float) self.assertIsInstance(fit_result.r_squared, float) self.assertEqual(fit_result.r_squared, 1.0) # by definition since there's no fuzzyness # Test several illegal ways to access the data. self.assertRaises(AttributeError, getattr, *[fit_result.params, 'a_fdska']) self.assertRaises(AttributeError, getattr, *[fit_result.params, 'c']) self.assertRaises(AttributeError, getattr, *[fit_result.params, 'a_stdev_stdev']) self.assertRaises(AttributeError, getattr, *[fit_result.params, 'a_stdev_']) self.assertRaises(AttributeError, getattr, *[fit_result.params, 'a__stdev'])
def test_fitting(self): """ Tests fitting with NumericalLeastSquares. Makes sure that the resulting objects and values are of the right type, and that the fit_result does not have unexpected members. """ xdata = np.linspace(1, 10, 10) ydata = 3*xdata**2 a = Parameter() # 3.1, min=2.5, max=3.5 b = Parameter() x = Variable() new = a*x**b fit = Fit(new, xdata, ydata, minimizer=MINPACK) fit_result = fit.execute() self.assertIsInstance(fit_result, FitResults) self.assertAlmostEqual(fit_result.value(a), 3.0) self.assertAlmostEqual(fit_result.value(b), 2.0) self.assertIsInstance(fit_result.stdev(a), float) self.assertIsInstance(fit_result.stdev(b), float) self.assertIsInstance(fit_result.r_squared, float) self.assertEqual(fit_result.r_squared, 1.0) # by definition since there's no fuzzyness
def test_vector_fitting_bounds(self): """ Tests fitting to a 3 component vector valued function, with bounds. """ a, b, c = parameters('a, b, c') a.min = 0 a.max = 25 b.min = 0 b.max = 500 a_i, b_i, c_i = variables('a_i, b_i, c_i') model = {a_i: a, b_i: b, c_i: c} xdata = np.array([ [10.1, 9., 10.5, 11.2, 9.5, 9.6, 10.], [102.1, 101., 100.4, 100.8, 99.2, 100., 100.8], [71.6, 73.2, 69.5, 70.2, 70.8, 70.6, 70.1], ]) fit = Fit( model=model, a_i=xdata[0], b_i=xdata[1], c_i=xdata[2], ) fit_result = fit.execute() self.assertAlmostEqual(fit_result.value(a), np.mean(xdata[0]), 4) self.assertAlmostEqual(fit_result.value(b), np.mean(xdata[1]), 4) self.assertAlmostEqual(fit_result.value(c), np.mean(xdata[2]), 4)
def test_chained_min_signature(self): """ Test the automatic generation of the signature for ChainedMinimizer """ minimizers = [ BFGS, DifferentialEvolution, BFGS, DifferentialEvolution, BFGS ] fit = Fit(self.model, self.xx, self.yy, self.ydata, minimizer=minimizers) names = [ 'BFGS', 'DifferentialEvolution', 'BFGS_2', 'DifferentialEvolution_2', 'BFGS_3' ] for name, param_name in zip(names, fit.minimizer.__signature__.parameters): self.assertEqual(name, param_name) # Check for equal lengths because zip is slippery that way self.assertEqual(len(names), len(fit.minimizer.__signature__.parameters)) for param in fit.minimizer.__signature__.parameters.values(): self.assertEqual(param.kind, inspect_sig.Parameter.KEYWORD_ONLY) # Make sure keywords end up at the right minimizer. with self.assertRaises(TypeError): # This is not a valid kwarg to DiffEvo, but it is to BFGS. Check if # we really go by name of the Minimizer, not by order. fit.execute(DifferentialEvolution={'return_all': False})
def test_minimize(self): """ Tests maximizing a function with and without constraints, taken from the scipy `minimize` tutorial. Compare the symfit result with the scipy result. https://docs.scipy.org/doc/scipy-0.18.1/reference/tutorial/optimize.html#constrained-minimization-of-multivariate-scalar-functions-minimize """ x = Parameter(value=-1.0) y = Parameter(value=1.0) # Use an unnamed Variable on purpose to test the auto-generation of names. model = Model(2 * x * y + 2 * x - x ** 2 - 2 * y ** 2) constraints = [ Ge(y - 1, 0), # y - 1 >= 0, Eq(x**3 - y, 0), # x**3 - y == 0, ] def func(x, sign=1.0): """ Objective function """ return sign*(2*x[0]*x[1] + 2*x[0] - x[0]**2 - 2*x[1]**2) def func_deriv(x, sign=1.0): """ Derivative of objective function """ dfdx0 = sign*(-2*x[0] + 2*x[1] + 2) dfdx1 = sign*(2*x[0] - 4*x[1]) return np.array([ dfdx0, dfdx1 ]) cons = ( {'type': 'eq', 'fun' : lambda x: np.array([x[0]**3 - x[1]]), 'jac' : lambda x: np.array([3.0*(x[0]**2.0), -1.0])}, {'type': 'ineq', 'fun' : lambda x: np.array([x[1] - 1]), 'jac' : lambda x: np.array([0.0, 1.0])}) # Unconstrained fit res = minimize(func, [-1.0,1.0], args=(-1.0,), jac=func_deriv, method='BFGS', options={'disp': False}) fit = Fit(model=- model) self.assertIsInstance(fit.objective, MinimizeModel) self.assertIsInstance(fit.minimizer, BFGS) fit_result = fit.execute() self.assertAlmostEqual(fit_result.value(x) / res.x[0], 1.0, 6) self.assertAlmostEqual(fit_result.value(y) / res.x[1], 1.0, 6) # Same test, but with constraints in place. res = minimize(func, [-1.0,1.0], args=(-1.0,), jac=func_deriv, constraints=cons, method='SLSQP', options={'disp': False}) from symfit.core.minimizers import SLSQP fit = Fit(- model, constraints=constraints) self.assertEqual(fit.constraints[0].constraint_type, Ge) self.assertEqual(fit.constraints[1].constraint_type, Eq) fit_result = fit.execute() self.assertAlmostEqual(fit_result.value(x), res.x[0], 6) self.assertAlmostEqual(fit_result.value(y), res.x[1], 6)
def test_interdependency_constrained(self): """ Test a model with interdependent components, and with constraints which depend on the Model's output. This is done in the MatrixSymbol formalism, using a Tikhonov regularization as an example. In this, a matrix inverse has to be calculated and is used multiple times. Therefore we split that term of into a seperate component, so the inverse only has to be computed once per model call. See https://arxiv.org/abs/1901.05348 for a more detailed background. """ N = Symbol('N', integer=True) M = MatrixSymbol('M', N, N) W = MatrixSymbol('W', N, N) I = MatrixSymbol('I', N, N) y = MatrixSymbol('y', N, 1) c = MatrixSymbol('c', N, 1) a, = parameters('a') z, = variables('z') i = Idx('i') model_dict = { W: Inverse(I + M / a ** 2), c: - W * y, z: sqrt(c.T * c) } # Sympy currently does not support derivatives of matrix expressions, # so we use CallableModel instead of Model. model = CallableModel(model_dict) # Generate data iden = np.eye(2) M_mat = np.array([[2, 1], [3, 4]]) y_vec = np.array([[3], [5]]) eval_model = model(I=iden, M=M_mat, y=y_vec, a=0.1) # Calculate the answers 'manually' so I know it was done properly W_manual = np.linalg.inv(iden + M_mat / 0.1 ** 2) c_manual = - np.atleast_2d(W_manual.dot(y_vec)) z_manual = np.atleast_1d(np.sqrt(c_manual.T.dot(c_manual))) self.assertEqual(y_vec.shape, (2, 1)) self.assertEqual(M_mat.shape, (2, 2)) self.assertEqual(iden.shape, (2, 2)) self.assertEqual(W_manual.shape, (2, 2)) self.assertEqual(c_manual.shape, (2, 1)) self.assertEqual(z_manual.shape, (1, 1)) np.testing.assert_almost_equal(W_manual, eval_model.W) np.testing.assert_almost_equal(c_manual, eval_model.c) np.testing.assert_almost_equal(z_manual, eval_model.z) fit = Fit(model, z=z_manual, I=iden, M=M_mat, y=y_vec) fit_result = fit.execute() # See if a == 0.1 was reconstructed properly. Since only a**2 features # in the equations, we check for the absolute value. Setting a.min = 0.0 # is not appreciated by the Minimizer, it seems. self.assertAlmostEqual(np.abs(fit_result.value(a)), 0.1)
def test_2_gaussian_2d_fitting(self): """ Tests fitting to a scalar gaussian with 2 independent variables with tight bounds. """ mean = (0.3, 0.4) # x, y mean 0.6, 0.4 cov = [[0.01**2, 0], [0, 0.01**2]] data = np.random.multivariate_normal(mean, cov, 3000000) mean = (0.7, 0.8) # x, y mean 0.6, 0.4 cov = [[0.01**2, 0], [0, 0.01**2]] data_2 = np.random.multivariate_normal(mean, cov, 3000000) data = np.vstack((data, data_2)) # Insert them as y,x here as np f***s up cartesian conventions. ydata, xedges, yedges = np.histogram2d(data[:, 1], data[:, 0], bins=100, range=[[0.0, 1.0], [0.0, 1.0]]) xcentres = (xedges[:-1] + xedges[1:]) / 2 ycentres = (yedges[:-1] + yedges[1:]) / 2 # Make a valid grid to match ydata xx, yy = np.meshgrid(xcentres, ycentres, sparse=False) # xdata = np.dstack((xx, yy)).T x = Variable() y = Variable() x0_1 = Parameter(0.7, min=0.6, max=0.9) sig_x_1 = Parameter(0.1, min=0.0, max=0.2) y0_1 = Parameter(0.8, min=0.6, max=0.9) sig_y_1 = Parameter(0.1, min=0.0, max=0.2) A_1 = Parameter() g_1 = A_1 * Gaussian(x, x0_1, sig_x_1) * Gaussian(y, y0_1, sig_y_1) x0_2 = Parameter(0.3, min=0.2, max=0.5) sig_x_2 = Parameter(0.1, min=0.0, max=0.2) y0_2 = Parameter(0.4, min=0.2, max=0.5) sig_y_2 = Parameter(0.1, min=0.0, max=0.2) A_2 = Parameter() g_2 = A_2 * Gaussian(x, x0_2, sig_x_2) * Gaussian(y, y0_2, sig_y_2) model = g_1 + g_2 fit = Fit(model, xx, yy, ydata) fit_result = fit.execute() self.assertIsInstance(fit.fit, ConstrainedNumericalLeastSquares) img = model(x=xx, y=yy, **fit_result.params) img_g_1 = g_1(x=xx, y=yy, **fit_result.params) img_g_2 = g_2(x=xx, y=yy, **fit_result.params) np.testing.assert_array_equal(img, img_g_1 + img_g_2) # Equal up to some precision. Not much obviously. self.assertAlmostEqual(fit_result.value(x0_1), 0.7, 3) self.assertAlmostEqual(fit_result.value(y0_1), 0.8, 3) self.assertAlmostEqual(fit_result.value(x0_2), 0.3, 3) self.assertAlmostEqual(fit_result.value(y0_2), 0.4, 3)
def test_global_fitting(self): """ Test a global fitting scenario with datasets of unequal length. In this scenario, a quartic equation is fitted where the constant term is shared between the datasets. (e.g. identical background noise) """ x_1, x_2, y_1, y_2 = variables('x_1, x_2, y_1, y_2') y0, a_1, a_2, b_1, b_2 = parameters('y0, a_1, a_2, b_1, b_2') # The following vector valued function links all the equations together # as stated in the intro. model = Model({ y_1: a_1 * x_1**2 + b_1 * x_1 + y0, y_2: a_2 * x_2**2 + b_2 * x_2 + y0, }) # Generate data from this model # xdata = np.linspace(0, 10) xdata1 = np.linspace(0, 10) xdata2 = xdata1[::2] # Make the sets of unequal size ydata1, ydata2 = model(x_1=xdata1, x_2=xdata2, a_1=101.3, b_1=0.5, a_2=56.3, b_2=1.1111, y0=10.8) # Add some noise to make it appear like real data np.random.seed(1) ydata1 += np.random.normal(0, 2, size=ydata1.shape) ydata2 += np.random.normal(0, 2, size=ydata2.shape) xdata = [xdata1, xdata2] ydata = [ydata1, ydata2] # Guesses a_1.value = 100 a_2.value = 50 b_1.value = 1 b_2.value = 1 y0.value = 10 eval_jac = model.eval_jacobian(x_1=xdata1, x_2=xdata2, a_1=101.3, b_1=0.5, a_2=56.3, b_2=1.1111, y0=10.8) self.assertEqual(len(eval_jac), 2) for comp in eval_jac: self.assertEqual(len(comp), len(model.params)) sigma_y = np.concatenate((np.ones(20), [2., 4., 5, 7, 3])) fit = Fit(model, x_1=xdata[0], x_2=xdata[1], y_1=ydata[0], y_2=ydata[1], sigma_y_2=sigma_y) fit_result = fit.execute() # fit_curves = model(x_1=xdata[0], x_2=xdata[1], **fit_result.params) self.assertAlmostEqual(fit_result.value(y0), 1.061892e+01, 3) self.assertAlmostEqual(fit_result.value(a_1), 1.013269e+02, 3) self.assertAlmostEqual(fit_result.value(a_2), 5.625694e+01, 3) self.assertAlmostEqual(fit_result.value(b_1), 3.362240e-01, 3) self.assertAlmostEqual(fit_result.value(b_2), 1.565253e+00, 3)
def test_chained_min(self): """Test fitting with a chained minimizer""" curvals = [p.value for p in self.model.params] fit = Fit(self.model, self.xx, self.yy, self.ydata, minimizer=[DifferentialEvolution, BFGS]) fit_result = fit.execute( DifferentialEvolution={'seed': 0, 'tol': 1e-4, 'maxiter': 10} ) self.assertAlmostEqual(fit_result.value(self.x0_1), 0.4, 4) self.assertAlmostEqual(fit_result.value(self.y0_1), 0.4, 4) self.assertEqual(curvals, [p.value for p in self.model.params])
def test_MatrixSymbolModel(self): """ Test a model which is defined by ModelSymbols, see #194 """ N = Symbol('N', integer=True) M = MatrixSymbol('M', N, N) W = MatrixSymbol('W', N, N) I = MatrixSymbol('I', N, N) y = MatrixSymbol('y', N, 1) c = MatrixSymbol('c', N, 1) a, b = parameters('a, b') z, x = variables('z, x') model_dict = { W: Inverse(I + M / a ** 2), c: - W * y, z: sqrt(c.T * c) } # TODO: This should be a Model in the future, but sympy is not yet # capable of computing Matrix derivatives at the time of writing. model = CallableModel(model_dict) self.assertEqual(model.params, [a]) self.assertEqual(model.independent_vars, [I, M, y]) self.assertEqual(model.dependent_vars, [z]) self.assertEqual(model.interdependent_vars, [W, c]) self.assertEqual(model.connectivity_mapping, {W: {I, M, a}, c: {W, y}, z: {c}}) # Generate data iden = np.eye(2) M_mat = np.array([[2, 1], [3, 4]]) y_vec = np.array([3, 5]) eval_model = model(I=iden, M=M_mat, y=y_vec, a=0.1) W_manual = np.linalg.inv(iden + M_mat / 0.1 ** 2) c_manual = - W_manual.dot(y_vec) z_manual = np.atleast_1d(np.sqrt(c_manual.T.dot(c_manual))) np.testing.assert_allclose(eval_model.W, W_manual) np.testing.assert_allclose(eval_model.c, c_manual) np.testing.assert_allclose(eval_model.z, z_manual) # Now try to retrieve the value of `a` from a fit a.value = 0.2 fit = Fit(model, z=z_manual, I=iden, M=M_mat, y=y_vec) fit_result = fit.execute() eval_model = model(I=iden, M=M_mat, y=y_vec, **fit_result.params) self.assertAlmostEqual(0.1, np.abs(fit_result.value(a))) np.testing.assert_allclose(eval_model.W, W_manual, rtol=1e-5) np.testing.assert_allclose(eval_model.c, c_manual, rtol=1e-5) np.testing.assert_allclose(eval_model.z, z_manual, rtol=1e-5)
def test_named_fitting(self): xdata = np.linspace(1, 10, 10) ydata = 3*xdata**2 a = Parameter('a', 1.0) b = Parameter('b', 2.5) x, y = variables('x, y') model = {y: a*x**b} fit = Fit(model, x=xdata, y=ydata) fit_result = fit.execute() self.assertIsInstance(fit_result, FitResults) self.assertAlmostEqual(fit_result.value(a), 3.0, 3) self.assertAlmostEqual(fit_result.value(b), 2.0, 4)
def test_pickle(self): xdata = np.linspace(1, 10, 10) ydata = 3 * xdata ** 2 a = Parameter('a') # 3.1, min=2.5, max=3.5 b = Parameter('b') x = Variable('x') y = Variable('y') new = {y: a * x ** b} fit = Fit(new, x=xdata, y=ydata) fit_result = fit.execute() new_result = pickle.loads(pickle.dumps(fit_result)) self.assertEqual(fit_result.__dict__.keys(), new_result.__dict__.keys())
def test_gaussian_2d_fitting(self): """ Tests fitting to a scalar gaussian function with 2 independent variables. Very sensitive to initial guesses, and if they are chosen too restrictive Fit actually throws a tantrum. It therefore appears to be more sensitive than NumericalLeastSquares. """ mean = (0.6, 0.4) # x, y mean 0.6, 0.4 cov = [[0.2**2, 0], [0, 0.1**2]] np.random.seed(0) data = np.random.multivariate_normal(mean, cov, 100000) # Insert them as y,x here as np f***s up cartesian conventions. ydata, xedges, yedges = np.histogram2d(data[:, 0], data[:, 1], bins=100, range=[[0.0, 1.0], [0.0, 1.0]]) xcentres = (xedges[:-1] + xedges[1:]) / 2 ycentres = (yedges[:-1] + yedges[1:]) / 2 # Make a valid grid to match ydata xx, yy = np.meshgrid(xcentres, ycentres, sparse=False, indexing='ij') x0 = Parameter(value=mean[0], min=0.0, max=1.0) sig_x = Parameter(value=0.2, min=0.0, max=0.3) y0 = Parameter(value=mean[1], min=0.0, max=1.0) sig_y = Parameter(value=0.1, min=0.0, max=0.3) A = Parameter(value=np.mean(ydata), min=0.0) x = Variable('x') y = Variable('y') g = Variable('g') model = Model({g: A * Gaussian(x, x0, sig_x) * Gaussian(y, y0, sig_y)}) fit = Fit(model, x=xx, y=yy, g=ydata) fit_result = fit.execute() self.assertAlmostEqual(fit_result.value(x0), np.mean(data[:, 0]), 3) self.assertAlmostEqual(fit_result.value(y0), np.mean(data[:, 1]), 3) self.assertAlmostEqual(np.abs(fit_result.value(sig_x)), np.std(data[:, 0]), 2) self.assertAlmostEqual(np.abs(fit_result.value(sig_y)), np.std(data[:, 1]), 2) self.assertGreaterEqual(fit_result.r_squared, 0.96) # Compare with industry standard MINPACK fit_std = Fit(model, x=xx, y=yy, g=ydata, minimizer=MINPACK) fit_std_result = fit_std.execute() self.assertAlmostEqual(fit_std_result.value(x0), fit_result.value(x0), 4) self.assertAlmostEqual(fit_std_result.value(y0), fit_result.value(y0), 4) self.assertAlmostEqual(fit_std_result.value(sig_x), fit_result.value(sig_x), 4) self.assertAlmostEqual(fit_std_result.value(sig_y), fit_result.value(sig_y), 4) self.assertAlmostEqual(fit_std_result.r_squared, fit_result.r_squared, 4)
def test_powell(self): """ Powell with a single parameter gave an error because a 0-d array was returned by scipy. So no error here is winning. """ x, y = variables('x, y') a, b = parameters('a, b') b.fixed = True model = Model({y: a * x + b}) xdata = np.linspace(0, 10) ydata = model(x=xdata, a=5.5, b=15.0).y + np.random.normal(0, 1) fit = Fit({y: a * x + b}, x=xdata, y=ydata, minimizer=Powell) fit_result = fit.execute() self.assertAlmostEqual(fit_result.value(b), 1.0)
def test_fixed_parameters(self): """ Make sure fixed parameters don't change on fitting """ xdata = np.arange(100) ydata = np.arange(100) a, b, c, d = parameters('a, b, c, d') x, y = variables('x, y') c.value = 4.0 c.fixed = True model_dict = {y: a * exp(-(x - b)**2 / (2 * c**2)) + d} fit = Fit(model_dict, x=xdata, y=ydata) fit_result = fit.execute() self.assertEqual(4.0, fit_result.params['c'])
def test_constraint_types(self): x = Parameter(value=-1.0) y = Parameter(value=1.0) z = Variable() model = Model({z: 2*x*y + 2*x - x**2 - 2*y**2}) # These types are not allowed constraints. for relation in [Lt, Gt, Ne]: with self.assertRaises(ModelError): Fit(model, constraints=[relation(x, y)]) # Should execute without problems. for relation in [Eq, Ge, Le]: Fit(model, constraints=[relation(x, y)]) fit = Fit(model, constraints=[Le(x, y)]) # Le should be transformed to Ge self.assertIs(fit.constraints[0].constraint_type, Ge) # Redo the standard test as a Le constraints = [ Le(- y + 1, 0), # y - 1 >= 0, Eq(x**3 - y, 0), # x**3 - y == 0, ] std_constraints = [ Ge(y - 1, 0), # y - 1 >= 0, Eq(x**3 - y, 0), # x**3 - y == 0, ] fit = Fit(- model, constraints=constraints) std_fit = Fit(- model, constraints=std_constraints) self.assertEqual(fit.constraints[0].constraint_type, Ge) self.assertEqual(fit.constraints[1].constraint_type, Eq) self.assertEqual(fit.constraints[0].params, [x, y]) self.assertEqual(fit.constraints[1].params, [x, y]) self.assertEqual(fit.constraints[0].jacobian_model.params, [x, y]) self.assertEqual(fit.constraints[1].jacobian_model.params, [x, y]) self.assertEqual(fit.constraints[0].hessian_model.params, [x, y]) self.assertEqual(fit.constraints[1].hessian_model.params, [x, y]) self.assertEqual(fit.constraints[0].__signature__, fit.constraints[1].__signature__) fit_result = fit.execute() std_result = std_fit.execute() self.assertAlmostEqual(fit_result.value(x), std_result.value(x)) self.assertAlmostEqual(fit_result.value(y), std_result.value(y))
def test_constrainedminimizers(self): """ Compare the different constrained minimizers, to make sure all support constraints, and converge to the same answer. """ minimizers = list(subclasses(ScipyConstrainedMinimize)) x = Parameter('x', value=-1.0) y = Parameter('y', value=1.0) z = Variable('z') model = Model({z: 2 * x * y + 2 * x - x ** 2 - 2 * y ** 2}) # First we try an unconstrained fit results = [] for minimizer in minimizers: fit = Fit(- model, minimizer=minimizer) fit_result = fit.execute(tol=1e-15) results.append(fit_result) # Compare the parameter values. for r1, r2 in zip(results[:-1], results[1:]): self.assertAlmostEqual(r1.value(x), r2.value(x), 6) self.assertAlmostEqual(r1.value(y), r2.value(y), 6) np.testing.assert_almost_equal(r1.covariance_matrix, r2.covariance_matrix) constraints = [ Ge(y - 1, 0), # y - 1 >= 0, Eq(x ** 3 - y, 0), # x**3 - y == 0, ] # Constrained fit. results = [] for minimizer in minimizers: if minimizer is COBYLA: # COBYLA only supports inequility. continue fit = Fit(- model, constraints=constraints, minimizer=minimizer) fit_result = fit.execute(tol=1e-15) results.append(fit_result) for r1, r2 in zip(results[:-1], results[1:]): self.assertAlmostEqual(r1.value(x), r2.value(x), 6) self.assertAlmostEqual(r1.value(y), r2.value(y), 6) np.testing.assert_almost_equal(r1.covariance_matrix, r2.covariance_matrix)
def test_jac_hess(self): """ Make sure both the Jacobian and Hessian are passed to the minimizer. """ x, y = variables('x, y') a, b = parameters('a, b') b.fixed = True model = Model({y: a * x + b}) xdata = np.linspace(0, 10) ydata = model(x=xdata, a=5.5, b=15.0).y + np.random.normal(0, 1) fit = Fit({y: a * x + b}, x=xdata, y=ydata, minimizer=TrustConstr) self.assertIsInstance(fit.minimizer.objective, LeastSquares) self.assertIsInstance(fit.minimizer.jacobian.__self__, LeastSquares) self.assertIsInstance(fit.minimizer.hessian.__self__, LeastSquares) fit_result = fit.execute() self.assertAlmostEqual(fit_result.value(b), 1.0)
def test_mexican_hat(self): """ Test that global minimisation finds the global minima, and doesn't affect the value of parameters. """ x = Parameter('x') x.min, x.max = -100, 100 x.value = -2.5 y = Variable('y') model = Model({y: x**4 - 10 * x**2 - x}) # Skewed Mexican hat fit = Fit(model, minimizer=[DifferentialEvolution, BFGS]) fit_result1 = fit.execute(DifferentialEvolution={'seed': 0}) fit = Fit(model) fit_result2 = fit.execute() self.assertGreater(fit_result1.value(x), 0) self.assertLess(fit_result2.value(x), 0)
def test_non_boundaries(self): """ Make sure parameter boundaries are not invented """ x = Parameter('x') y = Variable('y') model = Model({y: x**2}) bounded_minimizers = list(subclasses(BoundedMinimizer)) bounded_minimizers = [minimizer for minimizer in bounded_minimizers if minimizer is not DifferentialEvolution] for minimizer in bounded_minimizers: fit = Fit(model, minimizer=minimizer) if minimizer is MINPACK: pass # Not a MINPACKable problem because it only has a param else: fit_result = fit.execute() self.assertAlmostEqual(fit_result.value(x), 0.0) self.assertEqual(fit.minimizer.bounds, [(None, None)])
def test_grid_fitting_sparse(self): xdata = np.arange(-5, 5, 1) ydata = np.arange(5, 15, 1) xx, yy = np.meshgrid(xdata, ydata, sparse=True) zdata = (2.5*xx**2 + 3.0*yy**2) a = Parameter(value=2.4, max=2.75) b = Parameter(value=3.1, min=2.75) x = Variable('x') y = Variable('y') z = Variable('z') new = {z: a*x**2 + b*y**2} fit = Fit(new, x=xx, y=yy, z=zdata) results = fit.execute() self.assertAlmostEqual(results.value(a), 2.5, 4) self.assertAlmostEqual(results.value(b), 3.0, 4)
def test_gaussian_2d_fitting_background(self): """ Tests fitting to a scalar gaussian function with 2 independent variables to data with a background. Added after #149. """ mean = (0.6, 0.4) # x, y mean 0.6, 0.4 cov = [[0.2**2, 0], [0, 0.1**2]] background = 3.0 data = np.random.multivariate_normal(mean, cov, 500000) # print(data.shape) # Insert them as y,x here as np f***s up cartesian conventions. ydata, xedges, yedges = np.histogram2d(data[:, 0], data[:, 1], bins=100, range=[[0.0, 1.0], [0.0, 1.0]]) xcentres = (xedges[:-1] + xedges[1:]) / 2 ycentres = (yedges[:-1] + yedges[1:]) / 2 ydata += background # Background # Make a valid grid to match ydata xx, yy = np.meshgrid(xcentres, ycentres, sparse=False, indexing='ij') x0 = Parameter(value=1.1 * mean[0], min=0.0, max=1.0) sig_x = Parameter(value=1.1 * 0.2, min=0.0, max=0.3) y0 = Parameter(value=1.1 * mean[1], min=0.0, max=1.0) sig_y = Parameter(value=1.1 * 0.1, min=0.0, max=0.3) A = Parameter(value=1.1 * np.mean(ydata), min=0.0) b = Parameter(value=1.2 * background, min=0.0) x = Variable('x') y = Variable('y') g = Variable('g') model = Model({g: A * Gaussian(x, x0, sig_x) * Gaussian(y, y0, sig_y) + b}) # ydata, = model(x=xx, y=yy, x0=mean[0], y0=mean[1], sig_x=np.sqrt(cov[0][0]), sig_y=np.sqrt(cov[1][1]), A=1, b=3.0) fit = Fit(model, x=xx, y=yy, g=ydata) fit_result = fit.execute() self.assertAlmostEqual(fit_result.value(x0) / np.mean(data[:, 0]), 1.0, 2) self.assertAlmostEqual(fit_result.value(y0) / np.mean(data[:, 1]), 1.0, 2) self.assertAlmostEqual(np.abs(fit_result.value(sig_x)) / np.std(data[:, 0]), 1.0, 2) self.assertAlmostEqual(np.abs(fit_result.value(sig_y)) / np.std(data[:, 1]), 1.0, 2) self.assertAlmostEqual(background / fit_result.value(b), 1.0, 1) self.assertGreaterEqual(fit_result.r_squared / 0.96, 1.0)
def test_named_fitting(self): """ Make sure that fitting with NumericalLeastSquares works using a dict as model and that the resulting fit_result is of the right type. """ xdata = np.linspace(1, 10, 10) ydata = 3*xdata**2 a = Parameter(value=1.0) b = Parameter(value=2.5) x, y = variables('x, y') model = {y: a*x**b} fit = Fit(model, x=xdata, y=ydata, minimizer=MINPACK) fit_result = fit.execute() self.assertIsInstance(fit_result, FitResults) self.assertAlmostEqual(fit_result.value(a), 3.0) self.assertAlmostEqual(fit_result.value(b), 2.0)
def test_error_analytical(self): """ Test using a case where the analytical answer is known. Uses both symfit and scipy's curve_fit. Modeled after: http://nbviewer.ipython.org/urls/gist.github.com/taldcroft/5014170/raw/31e29e235407e4913dc0ec403af7ed524372b612/curve_fit.ipynb """ N = 10000 sigma = 10.0 xn = np.arange(N, dtype=np.float) # yn = np.zeros_like(xn) np.random.seed(10) yn = np.random.normal(size=len(xn), scale=sigma) a = Parameter() y = Variable() model = {y: a} fit = Fit(model, y=yn, sigma_y=sigma) fit_result = fit.execute() popt, pcov = curve_fit(lambda x, a: a * np.ones_like(x), xn, yn, sigma=sigma, absolute_sigma=True) self.assertAlmostEqual(fit_result.value(a), popt[0], 5) self.assertAlmostEqual(fit_result.stdev(a), np.sqrt(np.diag(pcov))[0], 2) fit_no_sigma = Fit(model, yn) fit_result_no_sigma = fit_no_sigma.execute() popt, pcov = curve_fit(lambda x, a: a * np.ones_like(x), xn, yn,) # With or without sigma, the bestfit params should be in agreement in case of equal weights self.assertAlmostEqual(fit_result.value(a), fit_result_no_sigma.value(a), 5) # Since symfit is all about absolute errors, the sigma will not be in agreement self.assertNotEqual(fit_result.stdev(a), fit_result_no_sigma.stdev(a), 5) self.assertAlmostEqual(fit_result_no_sigma.value(a), popt[0], 5) self.assertAlmostEqual(fit_result_no_sigma.stdev(a), pcov[0][0]**0.5, 5) # Analytical answer for mean of N(0,1): mu = 0.0 sigma_mu = sigma/N**0.5 self.assertAlmostEqual(fit_result.stdev(a), sigma_mu, 5)
def test_CallableNumericalModel2D(): """ Apply a CallableNumericalModel to 2D data, to see if it is agnostic to data shape. """ shape = (30, 40) def function(a, b): out = np.ones(shape) * a out[15:, :] += b return out a, b = parameters('a, b') y, = variables('y') model = CallableNumericalModel({y: function}, connectivity_mapping={y: {a, b}}) data = 15 * np.ones(shape) data[15:, :] += 20 fit = Fit(model, y=data) fit_result = fit.execute() assert fit_result.value(a) == pytest.approx(15) assert fit_result.value(b) == pytest.approx(20) def flattened_function(a, b): out = np.ones(shape) * a out[15:, :] += b return out.flatten() model = CallableNumericalModel({y: flattened_function}, connectivity_mapping={y: {a, b}}) data = 15 * np.ones(shape) data[15:, :] += 20 data = data.flatten() fit = Fit(model, y=data) flat_result = fit.execute() assert fit_result.value(a) == pytest.approx(flat_result.value(a)) assert fit_result.value(b) == pytest.approx(flat_result.value(b)) assert fit_result.stdev(a) is None and flat_result.stdev(a) is None assert fit_result.stdev(b) is None and flat_result.stdev(b) is None assert fit_result.r_squared == pytest.approx(flat_result.r_squared)
def test_constrainedminimizers(): """ Compare the different constrained minimizers, to make sure all support constraints, and converge to the same answer. """ minimizers = list(subclasses(ScipyConstrainedMinimize)) x = Parameter('x', value=-1.0) y = Parameter('y', value=1.0) z = Variable('z') model = Model({z: 2 * x * y + 2 * x - x**2 - 2 * y**2}) # First we try an unconstrained fit results = [] for minimizer in minimizers: fit = Fit(-model, minimizer=minimizer) assert isinstance(fit.objective, MinimizeModel) fit_result = fit.execute(tol=1e-15) results.append(fit_result) # Compare the parameter values. for r1, r2 in zip(results[:-1], results[1:]): assert r1.value(x) == pytest.approx(r2.value(x), 1e-6) assert r1.value(y) == pytest.approx(r2.value(y), 1e-6) assert r1.covariance_matrix == pytest.approx(r2.covariance_matrix) constraints = [ Ge(y - 1, 0), # y - 1 >= 0, Eq(x**3 - y, 0), # x**3 - y == 0, ] # Constrained fit. results = [] for minimizer in minimizers: if minimizer is COBYLA: # COBYLA only supports inequality. continue fit = Fit(-model, constraints=constraints, minimizer=minimizer) fit_result = fit.execute(tol=1e-15) results.append(fit_result) for r1, r2 in zip(results[:-1], results[1:]): assert r1.value(x) == pytest.approx(r2.value(x), 1e-6) assert r1.value(y) == pytest.approx(r2.value(y), 1e-6) assert r1.covariance_matrix == pytest.approx(r2.covariance_matrix)
def test_error_analytical(self): """ Test using a case where the analytical answer is known. Uses both symfit and scipy's curve_fit. Modeled after: http://nbviewer.ipython.org/urls/gist.github.com/taldcroft/5014170/raw/31e29e235407e4913dc0ec403af7ed524372b612/curve_fit.ipynb """ N = 10000 sigma = 10.0 * np.ones(N) xn = np.arange(N, dtype=np.float) # yn = np.zeros_like(xn) np.random.seed(10) yn = np.random.normal(size=len(xn), scale=sigma) a = Parameter() y = Variable('y') model = {y: a} fit = Fit(model, y=yn, sigma_y=sigma) fit_result = fit.execute() popt, pcov = curve_fit(lambda x, a: a * np.ones_like(x), xn, yn, sigma=sigma, absolute_sigma=True) self.assertAlmostEqual(fit_result.value(a), popt[0], 5) self.assertAlmostEqual(fit_result.stdev(a), np.sqrt(np.diag(pcov))[0], 2) fit_no_sigma = Fit(model, yn) fit_result_no_sigma = fit_no_sigma.execute() popt, pcov = curve_fit(lambda x, a: a * np.ones_like(x), xn, yn,) # With or without sigma, the bestfit params should be in agreement in case of equal weights self.assertAlmostEqual(fit_result.value(a), fit_result_no_sigma.value(a), 5) # Since symfit is all about absolute errors, the sigma will not be in agreement self.assertNotEqual(fit_result.stdev(a), fit_result_no_sigma.stdev(a), 5) self.assertAlmostEqual(fit_result_no_sigma.value(a), popt[0], 5) self.assertAlmostEqual(fit_result_no_sigma.stdev(a), pcov[0][0]**0.5, 5) # Analytical answer for mean of N(0,1): mu = 0.0 sigma_mu = sigma[0]/N**0.5 self.assertAlmostEqual(fit_result.stdev(a), sigma_mu, 5)
def fit_kinetics(t, d, model, chisq_thd=100): """ Fit time kinetics with two time components and corresponding relative amplitude. Parameters ---------- t : :class:`~numpy.ndarray` Array of time points d : :class:`~numpy.ndarray` Array of uptake values model : :class:`~pyhdx.fit_models.KineticsModel` chisq_thd : :obj:`float` Threshold chi squared above which the fitting is repeated with the Differential Evolution algorithm. Returns ------- res : :class:`~symfit.FitResults` Symfit fitresults object. """ if np.any(np.isnan(d)): raise ValueError("There shouldnt be NaNs anymore") er = EmptyResult(np.nan, {p.name: np.nan for p in model.sf_model.params}) return er model.initial_guess(t, d) with temporary_seed(43): fit = Fit(model.sf_model, t, d, minimizer=Powell) res = fit.execute() if (not check_bounds(res) or np.any(np.isnan(list(res.params.values()))) or res.chi_squared > chisq_thd): fit = Fit(model.sf_model, t, d, minimizer=DifferentialEvolution) # grid = model.initial_grid(t, d, step=5) res = fit.execute() return res
def test_likelihood_fitting_exponential(): """ Fit using the likelihood method. """ b = Parameter('b', value=4, min=3.0) x, y = variables('x, y') pdf = {y: Exp(x, 1 / b)} # Draw points from an Exp(5) exponential distribution. np.random.seed(100) # TODO: Do we *really* need 1m points? xdata = np.random.exponential(5, 1000000) # Expected parameter values mean = np.mean(xdata) stdev = np.std(xdata) mean_stdev = stdev / np.sqrt(len(xdata)) with pytest.raises(TypeError): fit = Fit(pdf, x=xdata, sigma_y=2.0, objective=LogLikelihood) fit = Fit(pdf, xdata, objective=LogLikelihood) fit_result = fit.execute() pdf_i = fit.model(x=xdata, **fit_result.params).y # probabilities likelihood = np.product(pdf_i) loglikelihood = np.sum(np.log(pdf_i)) assert fit_result.value(b) == pytest.approx(mean, 1e-3) assert fit_result.value(b) == pytest.approx(mean, 1e-3) assert fit_result.value(b) == pytest.approx(mean, 1e-3) assert fit_result.value(b) == pytest.approx(mean, 1e-3) assert fit_result.value(b) == pytest.approx(mean, 1e-3) assert fit_result.value(b) == pytest.approx(mean, 1e-3) assert fit_result.value(b) == pytest.approx(stdev, 1e-3) assert fit_result.stdev(b) == pytest.approx(mean_stdev, 1e-3) assert likelihood == pytest.approx(fit_result.likelihood) assert loglikelihood == pytest.approx(fit_result.log_likelihood)
def test_error_advanced(self): """ Compare the error propagation of Fit against NumericalLeastSquares. Models an example from the mathematica docs and try's to replicate it: http://reference.wolfram.com/language/howto/FitModelsWithMeasurementErrors.html """ data = [ [0.9, 6.1, 9.5], [3.9, 6., 9.7], [0.3, 2.8, 6.6], [1., 2.2, 5.9], [1.8, 2.4, 7.2], [9., 1.7, 7.], [7.9, 8., 10.4], [4.9, 3.9, 9.], [2.3, 2.6, 7.4], [4.7, 8.4, 10.] ] xdata, ydata, zdata = [np.array(data) for data in zip(*data)] # errors = np.array([.4, .4, .2, .4, .1, .3, .1, .2, .2, .2]) a = Parameter('a', 3.0) b = Parameter('b', 0.9) c = Parameter('c', 5.0) x = Variable('x') y = Variable('y') z = Variable('z') model = {z: a * log(b * x + c * y)} const_fit = Fit(model, xdata, ydata, zdata, absolute_sigma=False) const_result = const_fit.execute() fit = Fit(model, xdata, ydata, zdata, absolute_sigma=False, minimizer=MINPACK) std_result = fit.execute() self.assertEqual(const_fit.absolute_sigma, fit.absolute_sigma) self.assertAlmostEqual(const_result.value(a), std_result.value(a), 4) self.assertAlmostEqual(const_result.value(b), std_result.value(b), 4) self.assertAlmostEqual(const_result.value(c), std_result.value(c), 4) self.assertAlmostEqual(const_result.stdev(a), std_result.stdev(a), 4) self.assertAlmostEqual(const_result.stdev(b), std_result.stdev(b), 4) self.assertAlmostEqual(const_result.stdev(c), std_result.stdev(c), 4)
def test_vector_none_fitting(self): """ Fit to a 3 component vector valued function with one variables data set to None, without bounds or guesses. """ a, b, c = parameters('a, b, c') a_i, b_i, c_i = variables('a_i, b_i, c_i') model = {a_i: a, b_i: b, c_i: c} xdata = np.array([ [10.1, 9., 10.5, 11.2, 9.5, 9.6, 10.], [102.1, 101., 100.4, 100.8, 99.2, 100., 100.8], [71.6, 73.2, 69.5, 70.2, 70.8, 70.6, 70.1], ]) fit_none = Fit( model=model, a_i=xdata[0], b_i=xdata[1], c_i=None, minimizer=MINPACK ) fit = Fit( model=model, a_i=xdata[0], b_i=xdata[1], c_i=xdata[2], minimizer=MINPACK ) fit_none_result = fit_none.execute() fit_result = fit.execute() self.assertAlmostEqual(fit_none_result.value(a), fit_result.value(a), 4) self.assertAlmostEqual(fit_none_result.value(b), fit_result.value(b), 4) # the parameter without data should be unchanged. self.assertAlmostEqual(fit_none_result.value(c), 1.0)
def test_single_param_model(self): """ Added after #161, this tests if models with a single additive parameter are fitted properly. The problem with these models is that their jacobian is in principle just int 1, which is not the correct shape. No news is good news. :return: """ T = Variable('T') l = Variable('l') s = Parameter('s', value=300) a = Parameter('a', value=300) model = {l: s + a + 1 / (1 + exp(- T))} temp_data = [270, 280, 285, 290, 295, 300, 310, 320] length_data = [8.33, 8.41, 8.45, 8.5, 8.54, 9.13, 9.27, 9.4] fit = Fit(model, l=length_data, T=temp_data) fit_result = fit.execute() # Raise the stakes by increasing the dimensionality of the data TT, LL = np.meshgrid(temp_data, length_data) fit = Fit(model, l=LL, T=TT) fit_result = fit.execute()
def test_data_for_constraint(): """ Test the signature handling when constraints are at play. Constraints should take seperate data, but still kwargs that are not found in either the model nor the constraints should raise an error. """ A, mu, sig = parameters('A, mu, sig') x, y, Y = variables('x, y, Y') model = Model({y: A * Gaussian(x, mu=mu, sig=sig)}) constraint = Model.as_constraint(Y, model, constraint_type=Eq) np.random.seed(2) xdata = np.random.normal(1.2, 2, 10) ydata, xedges = np.histogram(xdata, bins=int(np.sqrt(len(xdata))), density=True) # Allowed fit = Fit(model, x=xdata, y=ydata, Y=2, constraints=[constraint]) assert isinstance(fit.objective, LeastSquares) assert isinstance(fit.minimizer.constraints[0], MinimizeModel) fit = Fit(model, x=xdata, y=ydata) assert isinstance(fit.objective, LeastSquares) fit = Fit(model, x=xdata, objective=LogLikelihood) assert isinstance(fit.objective, LogLikelihood) # Not allowed with pytest.raises(TypeError): fit = Fit(model, x=xdata, y=ydata, Y=2) with pytest.raises(TypeError): fit = Fit(model, x=xdata, y=ydata, Y=2, Z=3, constraints=[constraint]) with pytest.raises(TypeError): fit = Fit(model, x=xdata, y=ydata, objective=LogLikelihood)
def test_vector_fitting(): """ Test the behavior in the presence of bounds or constraints: `Fit` should select `ConstrainedNumericalLeastSquares` when bounds or constraints are provided, or for vector models in general. For scalar models, use `NumericalLeastSquares`. """ a, b = parameters('a, b') a_i, = variables('a_i') xdata = np.array([ [10.1, 9., 10.5, 11.2, 9.5, 9.6, 10.], [102.1, 101., 100.4, 100.8, 99.2, 100., 100.8], [71.6, 73.2, 69.5, 70.2, 70.8, 70.6, 70.1], ]) # Make a new scalar model. scalar_model = {a_i: a + b} simple_fit = Fit(model=scalar_model, a_i=xdata[0], minimizer=MINPACK) assert isinstance(simple_fit.minimizer, MINPACK) constrained_fit = Fit(model=scalar_model, a_i=xdata[0], constraints=[Equality(a + b, 110)]) assert isinstance(constrained_fit.minimizer, SLSQP) a.min = 0 a.max = 25 a.value = 10 b.min = 80 b.max = 120 b.value = 100 bound_fit = Fit( model=scalar_model, a_i=xdata[0], ) assert isinstance(bound_fit.minimizer, LBFGSB) # Repeat all of the above for the Vector model a, b, c = parameters('a, b, c') a_i, b_i, c_i = variables('a_i, b_i, c_i') model = {a_i: a, b_i: b, c_i: c} simple_fit = Fit( model=model, a_i=xdata[0], b_i=xdata[1], c_i=xdata[2], ) assert isinstance(simple_fit.minimizer, BFGS) constrained_fit = Fit(model=model, a_i=xdata[0], b_i=xdata[1], c_i=xdata[2], constraints=[Equality(a + b + c, 180)]) assert isinstance(constrained_fit.minimizer, SLSQP) a.min = 0 a.max = 25 a.value = 10 b.min = 80 b.max = 120 b.value = 100 bound_fit = Fit( model=model, a_i=xdata[0], b_i=xdata[1], c_i=xdata[2], ) assert isinstance(bound_fit.minimizer, LBFGSB) fit_result = bound_fit.execute() assert fit_result.value(a) == pytest.approx(np.mean(xdata[0]), rel=1e-6) assert fit_result.value(b) == pytest.approx(np.mean(xdata[1]), rel=1e-6) assert fit_result.value(c) == pytest.approx(np.mean(xdata[2]), rel=1e-6)
def test_simple_sigma(): """ Make sure we produce the same results as scipy's curve_fit, with and without sigmas, and compare the results of both to a known value. """ t_data = np.array([1.4, 2.1, 2.6, 3.0, 3.3]) y_data = np.array([10, 20, 30, 40, 50]) sigma = 0.2 n = np.array([5, 3, 8, 15, 30]) sigma_t = sigma / np.sqrt(n) # We now define our model y = Variable('x') g = Parameter('g') t_model = (2 * y / g)**0.5 fit = Fit(t_model, y_data, t_data) # , sigma=sigma_t) fit_result = fit.execute() # h_smooth = np.linspace(0,60,100) # t_smooth = t_model(y=h_smooth, **fit_result.params) # Lets with the results from curve_fit, no weights popt_noweights, pcov_noweights = curve_fit(lambda y, p: (2 * y / p)**0.5, y_data, t_data) assert fit_result.value(g) == pytest.approx(popt_noweights[0]) assert fit_result.stdev(g) == pytest.approx(np.sqrt(pcov_noweights[0, 0]), 1e-6) # Same sigma everywere fit = Fit(t_model, y_data, t_data, 0.0031, absolute_sigma=False) fit_result = fit.execute() popt_sameweights, pcov_sameweights = curve_fit( lambda y, p: (2 * y / p)**0.5, y_data, t_data, sigma=0.0031 * np.ones(len(y_data)), absolute_sigma=False) assert fit_result.value(g) == pytest.approx(popt_sameweights[0], 1e-4) assert fit_result.stdev(g) == pytest.approx( np.sqrt(pcov_sameweights[0, 0]), 1e-4) # Same weight everywere should be the same as no weight when absolute_sigma=False assert fit_result.value(g) == pytest.approx(popt_noweights[0], 1e-4) assert fit_result.stdev(g) == pytest.approx(np.sqrt(pcov_noweights[0, 0]), 1e-4) # Different sigma for every point fit = Fit(t_model, y_data, t_data, 0.1 * sigma_t, absolute_sigma=False) fit_result = fit.execute() popt, pcov = curve_fit(lambda y, p: (2 * y / p)**0.5, y_data, t_data, sigma=.1 * sigma_t) assert fit_result.value(g) == pytest.approx(popt[0]) assert fit_result.stdev(g) == pytest.approx(np.sqrt(pcov[0, 0]), 1e-6) # according to Mathematica assert fit_result.value(g) == pytest.approx(9.095, 1e-3) assert fit_result.stdev(g) == pytest.approx(0.102, 1e-2)
Created on Mon Dec 7 11:28:58 2015 @author: peterkroon """ from symfit import Variable, Parameter, exp, Fit, Model from symfit.contrib.interactive_guess import InteractiveGuess import numpy as np def distr(x, k, x0): kbT = 4.11 return exp(-k * (x - x0)**2 / kbT) x = Variable('x') y = Variable('y') k = Parameter('k', 900) x0 = Parameter('x0', 1.5) model = Model({y: distr(x, k, x0)}) x_data = np.linspace(0, 2.5, 50) y_data = model(x=x_data, k=1000, x0=1).y guess = InteractiveGuess(model, x=x_data, y=y_data, n_points=150) guess.execute() print(guess) fit = Fit(model, x=x_data, y=y_data) fit_result = fit.execute(maxiter=1000) print(fit_result)
return sdm.run(**config,params={'R0':r0}) func = { confirmed: sd_sim(r0)['Confirmed'][x], deaths: sd_sim(r0)['Deaths'][x] } # specify the data for the veriables xdata = t ydata = { 'confirmed': df['confirmed'].to_numpy(), 'deaths': df['deaths'].to_numpy() } print('I m about to fit') # optimize the model fit = Fit(func, x=xdata, **ydata) fit_result = fit.execute() # visualize results confirmed_fit, deaths_fit = fit.model(x=xdata, **fit_result.params) import matplotlib.pyplot as plt plt.plot(xdata, confirmed_fit) plt.plot(xdata, ydata['confirmed']) plt.legend() plt.show() plt.plot(xdata, deaths_fit) plt.plot(xdata, ydata['deaths']) plt.legend()
def fit(data, v=1.0, return_dict=None): k = Parameter("k", values=0.06704382317644017) # l = Parameter("l", values=1.0) # sigma2 = Parameter("sigma2", value=1) # mu = Parameter("mu", value=np.max(data) - 1) # V = Parameter("V", value=0.99, fixed=True) A = Parameter("A", value=0.0) B = Parameter("B", value=1.0) # C = Parameter("C", value=1.0) # D = Parameter("D", value=0.0) x = Variable() # model = Add(Mul(V, Piecewise((Mul((1 / (2 ** (k / float(2)) * gamma(k / 2)) * Pow(Mul(Add(x, -B), 1 / A), # (k / 2 - 1)) * exp( # -Mul(Add(x, -B), 1 / A) / 2)), 1 / A), GreaterThan(Add(x, -B), 0)), (1e-09, True))), # Mul(Add(1, -V), Piecewise(((1 / (sqrt(2 * pi * np.abs(sigma2)))) * exp( # -(x - mu) ** 2 / (2 * np.abs(sigma2))), GreaterThan(Add(x, -B), 0)), (1e-09, True)))) # model = Add(Mul(V, Piecewise((Mul(Mul(1 / k , exp(-Mul(Mul(Add(x,-B),1/A) ,1/ k))),1/A), GreaterThan(Add(x,-B),0)),(1e-09 , True))), # Mul(Add(1,-V), Piecewise(((1 / (sqrt(2 * pi * np.abs(sigma2)))) * exp(-(x - mu) ** 2 / (2 * np.abs(sigma2))),GreaterThan(Add(x,-B),0)), (1e-09 , True)))) # # model = Add(Mul(V, Piecewise( # (exp(-(1 - k * Mul(Add(x, -B), 1 / A)) ** (1 / k)) * (1 - k * Mul(Add(x, -B), 1 / A)) ** ( # 1 / k - Mul(Add(x, -B), 1 / A)), GreaterThan(k, 0)), # (Mul(exp(-exp(Mul(Add(x,-B),1/A))), exp(Mul(Add(x,-B),1/A))) , True))), # # Mul(V, Piecewise( # (exp(-(1 - k * Mul(Add(x, -B), 1 / A)) ** (1 / k)) * (1 - k * Mul(Add(x, -B), 1 / A)) ** ( # 1 / k - Mul(Add(x, -B), 1 / A)), GreaterThan(k, 0)), # (Mul(exp(-exp(Mul(Add(x, -B), 1 / A))), exp(Mul(Add(x, -B), 1 / A))), True))) # ) # model = Piecewise( # ((exp(-(1 - k * Mul(Add(x, -B), 1 / A)) ** (1 / k)) , (1 - k * Mul(Add(x, -B), 1 / A)) ** ( # 1 / k - Mul(Add(x, -B), 1 / A))), GreaterThan(k, 0)), # (Mul(exp(-exp(Mul(Add(x, -B), 1 / A))), exp(Mul(Add(x, -B), 1 / A))), True)) # model = Mul(exp(-exp(Mul(Add(x, -B), 1 / A))), exp(Mul(Add(x, -B), 1 / A))) # model = Add(Mul(V,Piecewise((Mul(Mul(exp(-Pow((1 - Mul(k, Mul(Add(x, -B), 1 / A))), 1 / k)), # Pow((1 - Mul(k, Mul(Add(x, -B), 1 / A))), (1 / k - 1))), 1 / A), # GreaterThan(1 / k, Mul(Add(x, -B), 1 / A))), # (1e-09, True))), # Mul(1-V, Piecewise((Mul(Mul(exp(-Pow((1 - Mul(k, Mul(Add(x, -D), 1 / C))), 1 / l)), # Pow((1 - Mul(l, Mul(Add(x, -D), 1 / C))), (1 / k - 1))), 1 / C), # GreaterThan(1 / k, Mul(Add(x, -D), 1 / C))), # (1e-09, True))) # ) # model = Piecewise((Mul(Mul(exp(-Pow((1 - Mul(k, Mul(Add(x, -B), 1 / A))), 1 / k)), # Pow((1 - Mul(k, Mul(Add(x, -B), 1 / A))), (1 / k - 1))), 1 / A), # GreaterThan(1 / k, Mul(Add(x, -B), 1 / A))), # (1e-09, True)) model = Piecewise((Mul(Mul(exp(-Pow((1 - Mul(k, Mul(Add(x, -B), 1 / A))), 1 / k)), Pow((1 - Mul(k, Mul(Add(x, -B), 1 / A))), (1 / k - 1))),1/A), GreaterThan(1 / k, Mul(Add(x, -B), 1 / A))), (1e-09, True)) # Do the fitting! ## fit = Fit(model, data, objective=LogLikelihood, constraints=[ GreaterThan(A, 0.1), GreaterThan(20, A), GreaterThan(B, 0), GreaterThan(20, B), # GreaterThan(C, 0.01), # GreaterThan(20, C), # GreaterThan(D, -1), # GreaterThan(13, D), # GreaterThan(mu, 5), # GreaterThan(np.max(data), mu), # GreaterThan(sigma2, 0.5), # GreaterThan(5, sigma2), GreaterThan(k, -10.0), GreaterThan(10.0, k), #2 # GreaterThan(l, 0.01), # GreaterThan(10.0, l), # 2 # GreaterThan(1.0, V), # GreaterThan(V, 0.01), # 0.00001 ]) fit_result = None try: fit_result = fit.execute(tol=1e-09) # tol=1e-09 except ValueError: print(ValueError) return None if return_dict is not None: return_dict.append((model, fit_result.params, fit_result.gof_qualifiers["objective_value"])) return model, fit_result.params, fit_result.gof_qualifiers["objective_value"]
class CellFit(object): # Default functions to use for given data classes. defaults = { 'binary': CellBinaryFunction, 'storm': CellSTORMMembraneFunction, 'brightfield': CellImageFunction, 'fluorescence': CellImageFunction, } def __init__(self, cell_obj, data_name='binary', cell_function=None, minimizer=Powell, **kwargs): self.cell_obj = cell_obj self.data_name = data_name self.minimizer = minimizer self.kwargs = kwargs #todo self.dclass = self.data_elem.dclass #the self.data_elem value changes when sefl.cell_fuction is defined! func_klass = self.defaults[ self.dclass] if not cell_function else cell_function #todo check custom function if issubclass(func_klass, CellMinimizeFunctionBase): self.cell_function = func_klass(self.cell_obj, data_name) elif callable(func_klass): self.cell_function = func_klass else: raise TypeError("Invalid type for cell_function keyword argument.") self.model = NumericalCellModel(cell_obj, self.cell_function) if self.dclass == 'storm': data_elem = self.cell_obj.data.data_dict[self.data_name] r_mean = self.cell_obj.coords.calc_rc(data_elem['x'], data_elem['y']).mean() idx = [p.name for p in self.model.params].index('r') self.model.params[idx].value = r_mean self.model.params[idx].min = 0.8 * r_mean self.model.params[idx].max = 1.2 * r_mean self.fit = Fit(self.model, self.data_elem, minimizer=minimizer, **kwargs) def renew_fit(self): self.fit = Fit(self.model, self.data_elem, minimizer=self.minimizer, **self.kwargs) def execute(self, **kwargs): return self.fit.execute(**kwargs) def fit_parameters(self, parameters, **kwargs): with set_params(self.fit, parameters): self.renew_fit() res = self.execute(**kwargs) for k, v in res.params.items(): i = [par.name for par in self.model.params].index(k) self.model.params[i].value = v self.model.cell_obj.coords.sub_par(res.params) return res def execute_stepwise(self, **kwargs): i = 0 j = 0 prev_val = 0 imax = kwargs.get('imax', 3) jmax = kwargs.get('jmax', 5) assert imax > 0 assert jmax > 0 while i < imax and j < jmax: #todo checking and testng j += 1 res = self.fit_parameters('r', **kwargs) res = self.fit_parameters('xl xr', **kwargs) res = self.fit_parameters('a0 a1 a2', **kwargs) print('Current minimize value: {}'.format(res.objective_value)) if prev_val == res.objective_value: i += 1 prev_val = res.objective_value return res @property def data_elem(self): try: return self.cell_function.target_data except AttributeError: return self.cell_obj.data.data_dict[self.data_name]
def renew_fit(self): self.fit = Fit(self.model, self.data_elem, minimizer=self.minimizer, **self.kwargs)
def calibrate(self, corr_type="gamma_corr"): """ Get the calibration matrix. :param corr_type: Type of fit/correction. Default is gamma correction. :return: Calibration matrix. """ if corr_type != "gamma_corr": # here could be setting for other fit types raise ValueError( 'Correction type {} is not recognized. ' 'Possible values are: "gamma_corr"'.format(corr_type)) self.corr_type = corr_type # fit variables r, g, b, l, m, s = variables('r, g, b, l, m, s') # define models model = None if self.corr_type == "gamma_corr": a_0l, a_0m, a_0s = parameters('a_0l, a_0m, a_0s', min=0.0, value=0.0) a_lr, a_lg, a_lb, a_mr, a_mg, a_mb, a_sr, a_sg, a_sb = \ parameters('a_lr, a_lg, a_lb, a_mr, a_mg, a_mb, ' 'a_sr, a_sg, a_sb', min=0.0, value=1.0) gamma_r, gamma_g, gamma_b = parameters('gamma_r, gamma_g, gamma_b', value=1.5) model = Model({ l: a_0l + a_lr * r**gamma_r + a_lg * g**gamma_g + a_lb * b**gamma_b, m: a_0m + a_mr * r**gamma_r + a_mg * g**gamma_g + a_mb * b**gamma_b, s: a_0s + a_sr * r**gamma_r + a_sg * g**gamma_g + a_sb * b**gamma_b, }) # get values for variables r, g, b = self._rgb_mat l, m, s = self._lms_mat # avoid division by zero errors min_val = 0.00000001 r[r == 0] = min_val g[g == 0] = min_val b[b == 0] = min_val fit = Fit(model, r=r, g=g, b=b, l=l, m=m, s=s) fit_res = fit.execute() p_r = fit_res.params cm = np.ones(1) if self.corr_type == "gamma_corr": cm = np.asarray([[p_r["a_0l"], p_r["a_0m"], p_r["a_0s"]], [p_r["a_lr"], p_r["a_lg"], p_r["a_lb"]], [p_r["a_mr"], p_r["a_mg"], p_r["a_mb"]], [p_r["a_sr"], p_r["a_sg"], p_r["a_sb"]], [p_r["gamma_r"], p_r["gamma_g"], p_r["gamma_b"]]]) self.calibration_matrix = cm inv_mat = np.zeros((5, 3)) inv_mat[0] = cm[0] inv_mat[1:4] = np.linalg.inv(cm[1:4]) inv_mat[4] = np.asarray([1. / cm[4][0], 1. / cm[4][1], 1. / cm[4][2]]) self.inv_calibration_matrix = inv_mat
def test_error_advanced(self): """ Models an example from the mathematica docs and try's to replicate it using both symfit and scipy's curve_fit. http://reference.wolfram.com/language/howto/FitModelsWithMeasurementErrors.html """ data = [ [0.9, 6.1, 9.5], [3.9, 6., 9.7], [0.3, 2.8, 6.6], [1., 2.2, 5.9], [1.8, 2.4, 7.2], [9., 1.7, 7.], [7.9, 8., 10.4], [4.9, 3.9, 9.], [2.3, 2.6, 7.4], [4.7, 8.4, 10.] ] xdata, ydata, zdata = [np.array(data) for data in zip(*data)] xy = np.vstack((xdata, ydata)) # z = np.array(z) errors = np.array([.4, .4, .2, .4, .1, .3, .1, .2, .2, .2]) # raise Exception(xy, z) a = Parameter(value=3.0) b = Parameter(value=0.9) c = Parameter(value=5) x = Variable('x') y = Variable('y') z = Variable('z') model = {z: a * log(b * x + c * y)} # fit = Fit(model, xy, z, absolute_sigma=False) fit = Fit(model, xdata, ydata, zdata, absolute_sigma=False) # fit = Fit(model, x=xdata, y=ydata, z=zdata, absolute_sigma=False) fit_result = fit.execute() # Same as Mathematica default behavior. self.assertAlmostEqual(fit_result.value(a), 2.9956, 4) self.assertAlmostEqual(fit_result.value(b), 0.563212, 4) self.assertAlmostEqual(fit_result.value(c), 3.59732, 4) self.assertAlmostEqual(fit_result.stdev(a), 0.278304, 4) self.assertAlmostEqual(fit_result.stdev(b), 0.224107, 4) self.assertAlmostEqual(fit_result.stdev(c), 0.980352, 4) fit = Fit(model, xdata, ydata, zdata, absolute_sigma=True) fit_result = fit.execute() # Same as Mathematica in Measurement error mode, but without suplying # any errors. self.assertAlmostEqual(fit_result.value(a), 2.9956, 4) self.assertAlmostEqual(fit_result.value(b), 0.563212, 4) self.assertAlmostEqual(fit_result.value(c), 3.59732, 4) self.assertAlmostEqual(fit_result.stdev(a), 0.643259, 4) self.assertAlmostEqual(fit_result.stdev(b), 0.517992, 4) self.assertAlmostEqual(fit_result.stdev(c), 2.26594, 4) fit = Fit(model, xdata, ydata, zdata, sigma_z=errors) fit_result = fit.execute() popt, pcov, infodict, errmsg, ier = curve_fit(lambda x_vec, a, b, c: a * np.log(b * x_vec[0] + c * x_vec[1]), xy, zdata, sigma=errors, absolute_sigma=True, full_output=True) # Same as curve_fit? self.assertAlmostEqual(fit_result.value(a), popt[0], 4) self.assertAlmostEqual(fit_result.value(b), popt[1], 4) self.assertAlmostEqual(fit_result.value(c), popt[2], 4) self.assertAlmostEqual(fit_result.stdev(a), np.sqrt(pcov[0,0]), 4) self.assertAlmostEqual(fit_result.stdev(b), np.sqrt(pcov[1,1]), 4) self.assertAlmostEqual(fit_result.stdev(c), np.sqrt(pcov[2,2]), 4) # Same as Mathematica with MEASUREMENT ERROR self.assertAlmostEqual(fit_result.value(a), 2.68807, 4) self.assertAlmostEqual(fit_result.value(b), 0.941344, 4) self.assertAlmostEqual(fit_result.value(c), 5.01541, 4) self.assertAlmostEqual(fit_result.stdev(a), 0.0974628, 4) self.assertAlmostEqual(fit_result.stdev(b), 0.247018, 4) self.assertAlmostEqual(fit_result.stdev(c), 0.597661, 4)
from __future__ import print_function from symfit import Parameter, Variable, Fit, exp import numpy as np import matplotlib.pyplot as plt import seaborn as sns palette = sns.color_palette() x = Variable() A = Parameter() sig = Parameter(name='sig', value=1.4, min=1.0, max=2.0) x0 = Parameter(name='x0', value=15.0, min=0.0) # Gaussian distrubution model = A*exp(-((x - x0)**2/(2 * sig**2))) # Sample 10000 points from a N(15.0, 1.5) distrubution sample = np.random.normal(loc=15.0, scale=1.5, size=(10000,)) ydata, bin_edges = np.histogram(sample, 100) xdata = (bin_edges[1:] + bin_edges[:-1])/2 fit = Fit(model, xdata, ydata) fit_result = fit.execute() print(fit_result) print(model) y = model(x=xdata, **fit_result.params) sns.regplot(xdata, ydata, fit_reg=False) plt.plot(xdata, y, color=palette[2]) plt.ylim(0, 400) plt.show()
def test_straight_line_analytical(self): """ Test symfit against a straight line, for which the parameters and their uncertainties are known analytically. Assuming equal weights. """ data = [[0, 1], [1, 0], [3, 2], [5, 4]] xdata, ydata = (np.array(i, dtype='float64') for i in zip(*data)) # x = np.arange(0, 100, 0.1) # np.random.seed(10) # y = 3.0*x + 105.0 + np.random.normal(size=x.shape) dx = xdata - xdata.mean() dy = ydata - ydata.mean() mean_squared_x = np.mean(xdata**2) - np.mean(xdata)**2 mean_xy = np.mean(xdata * ydata) - np.mean(xdata) * np.mean(ydata) a = mean_xy / mean_squared_x b = ydata.mean() - a * xdata.mean() self.assertAlmostEqual(a, 0.694915, 6) # values from Mathematica self.assertAlmostEqual(b, 0.186441, 6) S = np.sum((ydata - (a * xdata + b))**2) var_a_exact = S / (len(xdata) * (len(xdata) - 2) * mean_squared_x) var_b_exact = var_a_exact * np.mean(xdata**2) a_exact = a b_exact = b # We will now compare these exact results with values from symfit, numerically a, b = parameters('a, b') x, y = variables('x, y') model = {y: a * x + b} fit = Fit(model, x=xdata, y=ydata, minimizer=MINPACK) #, absolute_sigma=False) fit_result = fit.execute() popt, pcov = curve_fit(lambda z, c, d: c * z + d, xdata, ydata, jac=lambda z, c, d: np.transpose( [xdata, np.ones_like(xdata)])) # jac=lambda p, x, y, func: np.transpose([x, np.ones_like(x)])) # Dfun=lambda p, x, y, func: print(p, func, x, y)) # curve_fit self.assertAlmostEqual(a_exact, popt[0], 4) self.assertAlmostEqual(b_exact, popt[1], 4) self.assertAlmostEqual(var_a_exact, pcov[0][0], 6) self.assertAlmostEqual(var_b_exact, pcov[1][1], 6) self.assertAlmostEqual(a_exact, fit_result.value(a), 4) self.assertAlmostEqual(b_exact, fit_result.value(b), 4) self.assertAlmostEqual(var_a_exact, fit_result.variance(a), 6) self.assertAlmostEqual(var_b_exact, fit_result.variance(b), 6) # Do the fit with the LinearLeastSquares object fit = LinearLeastSquares(model, x=xdata, y=ydata) fit_result = fit.execute() self.assertAlmostEqual(a_exact, fit_result.value(a), 4) self.assertAlmostEqual(b_exact, fit_result.value(b), 4) self.assertAlmostEqual(var_a_exact, fit_result.variance(a), 6) self.assertAlmostEqual(var_b_exact, fit_result.variance(b), 6) # Lets also make sure the entire covariance matrix is the same for cov1, cov2 in zip(fit_result.covariance_matrix.flatten(), pcov.flatten()): self.assertAlmostEqual(cov1, cov2)
class Fourier : """ Fits data with a Fourier Series """ def __init__(self,file_name,y_label=None,revenue_goal=None,freq=None,number_of_terms=5) : if isinstance(file_name,str) : df = pd.read_excel(file_name) ydata = df['{}'.format(y_label)].values elif isinstance(file_name,list) : ydata = np.asarray(file_name) if len(ydata) == 3 : self.x = np.linspace(0,1,1000) y = np.full_like(self.x,ydata[0]) y[self.x>0.333] = ydata[1]; y[self.x>0.666] = ydata[2] self.y = y else : self.y = ydata self.x = np.linspace(0,1,len(ydata)) ''' insAvg = [(a + b) / 2 for a, b in zip(ydata[::2], ydata[1::2])] ins = np.arange(1,len(ydata),2) for i,j in zip(ins,insAvg) : ydata.insert(i,j) self.x = np.linspace(0,1,len(ydata)) ''' self.label = y_label self.revenueGoal = revenue_goal self.n = number_of_terms if not freq == None : self.w = freq else : self.w = Parameter('w',1*2*np.pi) def fourier(self) : n=self.n w = self.w lst = range(n+1) self.a_n = parameters(','.join(['a{}'.format(i) for i in lst])) self.b_n = parameters(','.join(['b{}'.format(i) for i in lst])) self.coeff = self.a_n + self.b_n self.eqn = sum([i * sp.cos(k * w * Symbol('x')) + j * sp.sin(k * w * Symbol('x')) for k,(i,j) in enumerate(zip(self.a_n,self.b_n))]) return self.eqn def fit(self) : x, y = variables('x, y') model_dict = {y: self.fourier()} self.ffit = Fit(model_dict, x=self.x, y=self.y) self.fit_result = self.ffit.execute() self.orderedDict = self.fit_result.params return self.fit_result.params def fitFunc(self) : self.fiteqn = self.eqn for k,v in self.orderedDict.items() : self.fiteqn = self.fiteqn.subs(Parameter('{}'.format(k)),self.orderedDict[k]) return self.fiteqn def fFunc(self,x) : """Function for plugging into distConst to get constant c""" return self.fiteqn.subs(Symbol('x'),x) def adjustFunc(self) : integral = quad(self.fFunc,0,1) c = self.revenueGoal/integral[0] self.orderedDict.update((k, v*c) for k, v in self.orderedDict.items()) #print(self.eqn) self.adjeqn = self.eqn for k,v in self.orderedDict.items() : self.adjeqn = self.adjeqn.subs(Parameter('{}'.format(k)),self.orderedDict[k]) print(self.orderedDict) print(self.adjeqn) return self.adjeqn def adjFunc(self,x) : """Function for plugging into AttainmentCalc""" return self.adjeqn.subs(Symbol('x'),x) def fitPlot(self,plot_data=True,color='red') : if plot_data == True : plt.plot(self.x, self.y,lw=3,alpha=0.7, label=self.label,color=color) # plots line that is being fit plt.plot(self.x, self.ffit.model(self.x, **self.fit_result.params).y, color='red', ls='--',label='_nolegend_') formatter = ticker.StrMethodFormatter('{x:,.0f}') plt.gca().yaxis.set_major_formatter(formatter)
def square(kABval=1e-2, kACval=1e-2, kBDval=1e-2, kCDval=1e-2, kBCval=1e-2, kADval=1e-2, conc0=50e-3, tvec=np.linspace(0, 200000, 100)): # Here we describe a model with A+B->AB A, B, C, Di, AB, AC, CD, BD, AD, BC, t = variables( 'A, B, C, Di, AB, AC, CD, BD, AD, BC, t') tdata = [0, 1, 2, 100, 1000, 10000] kAB = Parameter('kAB', kABval) # Rate constant for formation of AB kAC = Parameter('kAC', kACval) # Rate constant for formation of AC kBD = Parameter('kBD', kBDval) # Rate constant for formation of BD kCD = Parameter('kCD', kCDval) # Rate constant for formation of CD kBC = Parameter( 'kBC', kBCval) # Rate constant for formation of BC ## cross-connection kAD = Parameter( 'kAD', kADval) # Rate constant for formation of AD ## cross-connection # here's a list of rate expressions for each component in the mixture # here I'm calling the concentration of D as'Di' to avoid confusion model_dict = { D(AB, t): kAB * A * B, D(AC, t): kAC * A * C, D(BD, t): kBD * B * Di, D(CD, t): kCD * C * Di, D(BC, t): kBC * B * C, D(AD, t): kAD * A * Di, D(A, t): -(kAB * A * B + kAC * A * C + kAD * A * Di), D(B, t): -(kAB * A * B + kBD * B * Di + kBC * B * C), D(C, t): -(kAC * A * C + kCD * C * Di + kBC * B * C), D(Di, t): -(kBD * B * Di + kCD * C * Di + kAD * A * Di), } # here we define the ODE model and specify the start concentrations of each reagent ode_model = ODEModel(model_dict, initial={ t: 0.0, A: conc0, B: conc0, C: conc0, Di: conc0, AB: 0, BC: 0, AC: 0, BD: 0, CD: 0, AD: 0 }) # and then we fit the ODE model fit = Fit(ode_model, t=tdata, A=None, B=None, C=None, Di=None, AB=None, BC=None, AC=None, BD=None, CD=None, AD=None) fit_result = fit.execute() # Generate some data ans = ode_model(t=tvec, **fit_result.params)._asdict() # and plot it plt.plot(tvec, ans[AB], label='[AB]') plt.plot(tvec, ans[AC], label='[AC]') plt.plot(tvec, ans[CD], label='[CD]') plt.plot(tvec, ans[BC], label='[BC]') plt.plot(tvec, ans[BD], label='[BD]') plt.plot(tvec, ans[AD], label='[AD]') #plt.plot(tvec, BCres, label='[BC]') #plt.scatter(tdata, adata) plt.ylabel('Conc [M]') plt.xlabel('Time [s]') plt.legend() plt.show() res = [ ans[AB][-1], ans[AC][-1], ans[CD][-1], ans[BC][-1], ans[BD][-1], ans[AD][-1] ] resNorm = res / sum(res) plt.bar([1, 2, 3, 4, 5, 6], 100 * resNorm) plt.xticks([1, 2, 3, 4, 5, 6], ('[AB]', '[AC]', '[CD]', '[BC]', '[BD]', '[AD]')) plt.ylabel('%age at eq') plt.show() # enhancement, in percent, compared to equal concentrations everywhere resEnh = 100 * ( (np.array(resNorm)) - 1 / len(resNorm)) / (1 / len(resNorm)) # rounding errors can give a spurious difference: set small values to zero resEnh[abs(resEnh) < 1e-5] = 0 if (sum(abs(resEnh)) > 0): yval = [1, 2, 3, 4, 5, 6] plt.bar(yval, resEnh) plt.xticks(yval, ('[AB]', '[AC]', '[CD]', '[BC]', '[BD]', '[AD]')) plt.ylabel('%age at eq') plt.title('Enhancement / %') plt.show() else: print("No enhancement compared to equal rates")
def test_global_fitting(): """ In case of shared parameters between the components of the model, `Fit` should automatically use `ConstrainedLeastSquares`. :return: """ x_1, x_2, y_1, y_2 = variables('x_1, x_2, y_1, y_2') y0, a_1, a_2, b_1, b_2 = parameters('y0, a_1, a_2, b_1, b_2') # The following vector valued function links all the equations together # as stated in the intro. model = Model({ y_1: a_1 * x_1**2 + b_1 * x_1 + y0, y_2: a_2 * x_2**2 + b_2 * x_2 + y0, }) assert model.shared_parameters # Generate data from this model xdata1 = np.linspace(0, 10) xdata2 = xdata1[::2] # Only every other point. ydata1, ydata2 = model(x_1=xdata1, x_2=xdata2, a_1=101.3, b_1=0.5, a_2=56.3, b_2=1.1111, y0=10.8) # Add some noise to make it appear like real data np.random.seed(1) ydata1 += np.random.normal(0, 2, size=ydata1.shape) ydata2 += np.random.normal(0, 2, size=ydata2.shape) xdata = [xdata1, xdata2] ydata = [ydata1, ydata2] # Guesses a_1.value = 100 a_2.value = 50 b_1.value = 1 b_2.value = 1 y0.value = 10 fit = Fit(model, x_1=xdata[0], x_2=xdata[1], y_1=ydata[0], y_2=ydata[1]) assert isinstance(fit.minimizer, BFGS) # The next model does not share parameters, but is still a vector model = Model({ y_1: a_1 * x_1**2 + b_1 * x_1, y_2: a_2 * x_2**2 + b_2 * x_2, }) fit = Fit(model, x_1=xdata[0], x_2=xdata[1], y_1=ydata[0], y_2=ydata[1]) assert not model.shared_parameters assert isinstance(fit.minimizer, BFGS) # Scalar model, still use bfgs. model = Model({ y_1: a_1 * x_1**2 + b_1 * x_1, }) fit = Fit(model, x_1=xdata[0], y_1=ydata[0]) assert model.shared_parameters is False assert isinstance(fit.minimizer, BFGS)
def test_pickle(): """ Test the picklability of the different minimizers. """ # Create test data xdata = np.linspace(0, 100, 100) # From 0 to 100 in 100 steps a_vec = np.random.normal(15.0, scale=2.0, size=xdata.shape) b_vec = np.random.normal(100, scale=2.0, size=xdata.shape) ydata = a_vec * xdata + b_vec # Point scattered around the line 5 * x + 105 # Normal symbolic fit a = Parameter('a', value=0, min=0.0, max=1000) b = Parameter('b', value=0, min=0.0, max=1000) x, y = variables('x, y') # Make a set of all ScipyMinimizers, and add a chained minimizer. scipy_minimizers = list(subclasses(ScipyMinimize)) chained_minimizer = (DifferentialEvolution, BFGS) scipy_minimizers.append(chained_minimizer) constrained_minimizers = subclasses(ScipyConstrainedMinimize) # Test for all of them if they can be pickled. for minimizer in scipy_minimizers: if minimizer in constrained_minimizers: constraints = [Ge(b, a)] else: constraints = [] model = CallableNumericalModel({y: f}, independent_vars=[x], params=[a, b]) fit = Fit(model, x=xdata, y=ydata, minimizer=minimizer, constraints=constraints) if minimizer is not MINPACK: assert isinstance(fit.objective, LeastSquares) assert isinstance(fit.minimizer.objective, LeastSquares) else: assert isinstance(fit.objective, VectorLeastSquares) assert isinstance(fit.minimizer.objective, VectorLeastSquares) fit = fit.minimizer # Just check if the minimizer pickles dump = pickle.dumps(fit) pickled_fit = pickle.loads(dump) problematic_attr = [ 'objective', '_pickle_kwargs', 'wrapped_objective', 'constraints', 'wrapped_constraints', 'local_minimizer', 'minimizers' ] for key, value in fit.__dict__.items(): new_value = pickled_fit.__dict__[key] try: assert value == new_value except AssertionError as err: if key not in problematic_attr: raise err # These attr are new instances, and therefore do not # pass an equality test. All we can do is see if they # are at least the same type. if isinstance(value, (list, tuple)): for val1, val2 in zip(value, new_value): assert isinstance(val1, val2.__class__) if key == 'constraints': assert val1.model.constraint_type == val2.model.constraint_type assert list( val1.model.model_dict.values())[0] == list( val2.model.model_dict.values())[0] assert val1.model.independent_vars == val2.model.independent_vars assert val1.model.params == val2.model.params assert val1.model.__signature__ == val2.model.__signature__ elif key == 'wrapped_constraints': if isinstance(val1, dict): assert val1['type'] == val2['type'] assert set(val1.keys()) == set(val2.keys()) elif isinstance(val1, NonlinearConstraint): # For trust-ncg we manually check if # their dicts are equal, because no # __eq__ is implemented on # NonLinearConstraint assert len(val1.__dict__) == len(val2.__dict__) for key in val1.__dict__: try: assert val1.__dict__[ key] == val2.__dict__[key] except AssertionError: assert isinstance( val1.__dict__[key], val2.__dict__[key].__class__) else: raise NotImplementedError( 'No such constraint type is known.') elif key == '_pickle_kwargs': FitResults._array_safe_dict_eq(value, new_value) else: assert isinstance(new_value, value.__class__) assert set(fit.__dict__.keys()) == set(pickled_fit.__dict__.keys()) # Test if we converge to the same result. np.random.seed(2) res_before = fit.execute() np.random.seed(2) res_after = pickled_fit.execute() assert FitResults._array_safe_dict_eq(res_before.__dict__, res_after.__dict__)
def twoProductModel(kABval=1e-2, kBCval=1e-2, conc0=50e-3, tvec=np.linspace(0, 200000, 100)): # conc0 is initial concentration tdata = [0, 1, 2] # Here we describe a model with A+B->AB and B+C->BC A, B, C, AB, BC, t = variables('A, B, C, AB, BC, t') kAB = Parameter('kAB', kABval) # Rate constant for formation of AB kBC = Parameter('kBC', kBCval) # rate constant for formation of BC # here's a list of rate expressions for each component in the mixture model_dict = { D(AB, t): kAB * A * B, D(BC, t): kBC * B * C, D(A, t): -kAB * A * B, D(B, t): -(kAB * A * B + kBC * B * C), D(C, t): -kBC * B * C, } # here we define the ODE model and specify the start concentrations of each reagent ode_model = ODEModel(model_dict, initial={ t: 0.0, A: conc0, B: conc0, C: conc0, AB: 0, BC: 0 }) # and then we fit the ODE model fit = Fit(ode_model, t=tdata, A=None, B=None, AB=None, BC=None, C=None) fit_result = fit.execute() # Generate some data ans = ode_model(t=tvec, **fit_result.params)._asdict() # and plot it plt.plot(tvec, ans[AB], label='[AB]') plt.plot(tvec, ans[BC], label='[BC]') #plt.scatter(tdata, adata) plt.ylabel('Conc [M]') plt.xlabel('Time [s]') plt.legend() plt.show() res = [ans[AB][-1], ans[BC][-1]] resNorm = res / sum(res) plt.bar([1, 2], 100 * resNorm) plt.xticks([1, 2], ('[AB]', '[BC]')) plt.ylabel('%age at eq') plt.show() # enhancement, in percent, compared to equal concentrations everywhere resEnh = 100 * ( (np.array(resNorm)) - 1 / len(resNorm)) / (1 / len(resNorm)) # rounding errors can give a spurious difference: set small values to zero resEnh[abs(resEnh) < 1e-5] = 0 if (sum(abs(resEnh)) > 0): plt.bar([1, 2], resEnh) plt.xticks([1, 2], ('[AB]', '[BC]')) plt.ylabel('%age at eq') plt.title('Enhancement / %') plt.show() else: print("No enhancement compared to equal rates")
# SPDX-License-Identifier: MIT #!/usr/bin/env python3 # -*- coding: utf-8 -*- from symfit import variables, Parameter, Fit, D, ODEModel import numpy as np from symfit.contrib.interactive_guess import InteractiveGuess # First order reaction kinetics. Data taken from # http://chem.libretexts.org/Core/Physical_Chemistry/Kinetics/Rate_Laws/The_Rate_Law tdata = np.array([0, 0.9184, 9.0875, 11.2485, 17.5255, 23.9993, 27.7949, 31.9783, 35.2118, 42.973, 46.6555, 50.3922, 55.4747, 61.827, 65.6603, 70.0939]) concentration = np.array([0.906, 0.8739, 0.5622, 0.5156, 0.3718, 0.2702, 0.2238, 0.1761, 0.1495, 0.1029, 0.086, 0.0697, 0.0546, 0.0393, 0.0324, 0.026]) # Define our ODE model A, t = variables('A, t') k = Parameter('k') model = ODEModel({D(A, t): - k * A}, initial={t: tdata[0], A: concentration[0]}) guess = InteractiveGuess(model, A=concentration, t=tdata, n_points=250) guess.execute() print(guess) fit = Fit(model, A=concentration, t=tdata) fit_result = fit.execute() print(fit_result)
def test_weights(self): """ Compare NumericalLeastSquares with LinearLeastSquares to see if errors are implemented consistently. """ from symfit import Variable, Parameter, Fit t_data = np.array([1.4, 2.1, 2.6, 3.0, 3.3]) y_data = np.array([10, 20, 30, 40, 50]) sigma = 0.2 n = np.array([5, 3, 8, 15, 30]) sigma_t = sigma / np.sqrt(n) # We now define our model t, y = variables('t, y') b = Parameter() sqrt_g_inv = Parameter( ) # sqrt_g_inv = sqrt(1/g). Currently needed to linearize. # t_model = (2 * y / g)**0.5 t_model = {t: 2 * y**0.5 * sqrt_g_inv + b} # Different sigma for every point fit = Fit(t_model, y=y_data, t=t_data, sigma_t=sigma_t, absolute_sigma=False, minimizer=MINPACK) num_result_rel = fit.execute() fit = Fit(t_model, y=y_data, t=t_data, sigma_t=sigma_t, absolute_sigma=True, minimizer=MINPACK) num_result = fit.execute() # cov matrix should now be different for cov1, cov2 in zip(num_result_rel.covariance_matrix.flatten(), num_result.covariance_matrix.flatten()): # Make the absolute cov relative to see if it worked. ss_res = np.sum(num_result_rel.infodict['fvec']**2) degrees_of_freedom = len( fit.data[fit.model.dependent_vars[0]]) - len(fit.model.params) s_sq = ss_res / degrees_of_freedom self.assertAlmostEqual(cov1, cov2 * s_sq) # print(fit.model.numerical_chi_jacobian[0](sqrt_g_inv=1, **fit.data)) fit = LinearLeastSquares(t_model, y=y_data, t=t_data, sigma_t=sigma_t) fit_result = fit.execute() self.assertAlmostEqual(num_result.value(sqrt_g_inv), fit_result.value(sqrt_g_inv)) self.assertAlmostEqual( num_result.value(b) / fit_result.value(b), 1.0, 5) # for cov1, cov2 in zip(num_result.params.covariance_matrix.flatten(), fit_result.params.covariance_matrix.flatten()): # self.assertAlmostEqual(cov1, cov2) # print(cov1, cov2) for cov1, cov2 in zip(num_result.covariance_matrix.flatten(), fit_result.covariance_matrix.flatten()): self.assertAlmostEqual(cov1 / cov2, 1.0, 5)
:param x: Independent variable :param f: Frequency of the fourier series """ # Make the parameter objects for all the terms a0, *cos_a = parameters(','.join(['a{}'.format(i) for i in range(0, n + 1)])) sin_b = parameters(','.join(['b{}'.format(i) for i in range(1, n + 1)])) # Construct the series series = a0 + sum(ai * cos(i * f * x) + bi * sin(i * f * x) for i, (ai, bi) in enumerate(zip(cos_a, sin_b), start=1)) return series x, y = variables('x, y') w, = parameters('w') model_dict = {y: fourier_series(x, f=w, n=3)} print(model_dict) # Make step function data xdata = np.linspace(-np.pi, np.pi) ydata = np.zeros_like(xdata) ydata[xdata > 0] = 1 # Define a Fit object for this model and data fit = Fit(model_dict, x=xdata, y=ydata) fit_result = fit.execute() print(fit_result) # Plot the result plt.plot(xdata, ydata) plt.plot(xdata, fit.model(x=xdata, **fit_result.params).y, ls=':') plt.xlabel('x') plt.ylabel('y') plt.show()
def test_backwards_compatibility(self): """ The LinearLeastSquares should give results compatible with the NumericalLeastSquare's and curve_fit. To do this I test here the simple analytical model also used to calibrate the definition of absolute_sigma. """ N = 1000 sigma = 31.4 * np.ones(N) xn = np.arange(N, dtype=np.float) yn = np.zeros_like(xn) np.random.seed(10) yn = yn + np.random.normal(size=len(yn), scale=sigma) a = Parameter('a') y = Variable('y') model = {y: a} fit = LinearLeastSquares(model, y=yn, sigma_y=sigma, absolute_sigma=False) fit_result = fit.execute() fit = Fit(model, y=yn, sigma_y=sigma, absolute_sigma=False, minimizer=MINPACK) num_result = fit.execute() popt, pcov = curve_fit(lambda x, a: a * np.ones_like(x), xn, yn, sigma=sigma, absolute_sigma=False) self.assertAlmostEqual(fit_result.value(a), num_result.value(a), 5) self.assertAlmostEqual(fit_result.stdev(a), num_result.stdev(a), 5) self.assertAlmostEqual(fit_result.value(a), popt[0], 5) self.assertAlmostEqual(fit_result.stdev(a), pcov[0, 0]**0.5, 5) fit = LinearLeastSquares(model, y=yn, sigma_y=sigma, absolute_sigma=True) fit_result = fit.execute() fit = Fit(model, y=yn, sigma_y=sigma, absolute_sigma=True, minimizer=MINPACK) num_result = fit.execute() popt, pcov = curve_fit(lambda x, a: a * np.ones_like(x), xn, yn, sigma=sigma, absolute_sigma=True) self.assertAlmostEqual(fit_result.value(a), num_result.value(a), 5) self.assertAlmostEqual(fit_result.stdev(a), num_result.stdev(a), 5) self.assertAlmostEqual(fit_result.value(a), popt[0], 5) self.assertAlmostEqual(fit_result.stdev(a), pcov[0, 0]**0.5, 5)