def test_simple_kinetics(self): """ Simple kinetics data to test fitting """ tdata = np.array([10, 26, 44, 70, 120]) adata = 10e-4 * np.array([44, 34, 27, 20, 14]) a, b, t = variables('a, b, t') k, a0 = parameters('k, a0') k.value = 0.01 # a0.value, a0.min, a0.max = 54 * 10e-4, 40e-4, 60e-4 a0 = 54 * 10e-4 model_dict = { D(a, t): - k * a**2, D(b, t): k * a**2, } ode_model = ODEModel(model_dict, initial={t: 0.0, a: a0, b: 0.0}) # Analytical solution model = GradientModel({a: 1 / (k * t + 1 / a0)}) fit = Fit(model, t=tdata, a=adata) fit_result = fit.execute() fit = Fit(ode_model, t=tdata, a=adata, b=None, minimizer=MINPACK) ode_result = fit.execute() self.assertAlmostEqual(ode_result.value(k) / fit_result.value(k), 1.0, 4) self.assertAlmostEqual(ode_result.stdev(k) / fit_result.stdev(k), 1.0, 4) self.assertAlmostEqual(ode_result.r_squared / fit_result.r_squared, 1, 4) fit = Fit(ode_model, t=tdata, a=adata, b=None) ode_result = fit.execute() self.assertAlmostEqual(ode_result.value(k) / fit_result.value(k), 1.0, 4) self.assertAlmostEqual(ode_result.stdev(k) / fit_result.stdev(k), 1.0, 4) self.assertAlmostEqual(ode_result.r_squared / fit_result.r_squared, 1, 4)
def test_simple_kinetics(self): """ Simple kinetics data to test fitting """ tdata = np.array([10, 26, 44, 70, 120]) adata = 10e-4 * np.array([44, 34, 27, 20, 14]) a, b, t = variables('a, b, t') k, a0 = parameters('k, a0') k.value = 0.01 # a0.value, a0.min, a0.max = 54 * 10e-4, 40e-4, 60e-4 a0 = 54 * 10e-4 model_dict = { D(a, t): - k * a**2, D(b, t): k * a**2, } ode_model = ODEModel(model_dict, initial={t: 0.0, a: a0, b: 0.0}) # Analytical solution model = Model({a: 1 / (k * t + 1 / a0)}) fit = Fit(model, t=tdata, a=adata) fit_result = fit.execute() fit = Fit(ode_model, t=tdata, a=adata, b=None, minimizer=MINPACK) ode_result = fit.execute() self.assertAlmostEqual(ode_result.value(k) / fit_result.value(k), 1.0, 4) self.assertAlmostEqual(ode_result.stdev(k) / fit_result.stdev(k), 1.0, 4) self.assertAlmostEqual(ode_result.r_squared / fit_result.r_squared, 1, 4) fit = Fit(ode_model, t=tdata, a=adata, b=None) ode_result = fit.execute() self.assertAlmostEqual(ode_result.value(k) / fit_result.value(k), 1.0, 4) self.assertAlmostEqual(ode_result.stdev(k) / fit_result.stdev(k), 1.0, 4) self.assertAlmostEqual(ode_result.r_squared / fit_result.r_squared, 1, 4)
def test_diff_evo(self): """ Tests fitting to a scalar gaussian with 2 independent variables with wide bounds. """ fit = Fit(self.model, self.xx, self.yy, self.ydata, minimizer=BFGS) fit_result = fit.execute() self.assertIsInstance(fit.minimizer, BFGS) # Make sure a local optimizer doesn't find the answer. self.assertNotAlmostEqual(fit_result.value(self.x0_1), 0.4, 1) self.assertNotAlmostEqual(fit_result.value(self.y0_1), 0.4, 1) # On to the main event fit = Fit(self.model, self.xx, self.yy, self.ydata, minimizer=DifferentialEvolution) fit_result = fit.execute(polish=True, seed=0, tol=1e-4, maxiter=50) # Global minimizers are really bad at finding local minima though, so # roughly equal is good enough. self.assertAlmostEqual(fit_result.value(self.x0_1), 0.4, 1) self.assertAlmostEqual(fit_result.value(self.y0_1), 0.4, 1)
def test_full_eval_range(self): """ Test if ODEModels can be evaluated at t < t_initial. A bit of a no news is good news test. """ tdata = np.array([0, 10, 26, 44, 70, 120]) adata = 10e-4 * np.array([54, 44, 34, 27, 20, 14]) a, b, t = variables('a, b, t') k, a0 = parameters('k, a0') k.value = 0.01 t0 = tdata[2] a0 = adata[2] b0 = 0.02729855 # Obtained from evaluating from t=0. model_dict = { D(a, t): - k * a**2, D(b, t): k * a**2, } ode_model = ODEModel(model_dict, initial={t: t0, a: a0, b: b0}) fit = Fit(ode_model, t=tdata, a=adata, b=None) ode_result = fit.execute() self.assertGreater(ode_result.r_squared, 0.95, 4) # Now start from a timepoint that is not in the t-array such that it # triggers another pathway to be taken in integrating it. # Again, no news is good news. ode_model = ODEModel(model_dict, initial={t: t0 + 1e-5, a: a0, b: b0}) fit = Fit(ode_model, t=tdata, a=adata, b=None) ode_result = fit.execute() self.assertGreater(ode_result.r_squared, 0.95, 4)
def test_chained_min_signature(self): """ Test the automatic generation of the signature for ChainedMinimizer """ minimizers = [ BFGS, DifferentialEvolution, BFGS, DifferentialEvolution, BFGS ] fit = Fit(self.model, self.xx, self.yy, self.ydata, minimizer=minimizers) names = [ 'BFGS', 'DifferentialEvolution', 'BFGS_2', 'DifferentialEvolution_2', 'BFGS_3' ] for name, param_name in zip(names, fit.minimizer.__signature__.parameters): self.assertEqual(name, param_name) # Check for equal lengths because zip is slippery that way self.assertEqual(len(names), len(fit.minimizer.__signature__.parameters)) for param in fit.minimizer.__signature__.parameters.values(): self.assertEqual(param.kind, inspect_sig.Parameter.KEYWORD_ONLY) # Make sure keywords end up at the right minimizer. with self.assertRaises(TypeError): # This is not a valid kwarg to DiffEvo, but it is to BFGS. Check if # we really go by name of the Minimizer, not by order. fit.execute(DifferentialEvolution={'return_all': False})
def test_rdistmodel_fit(self): psf = PSF(sigma=1.59146972e+00) rm = RDistModel(psf, mem=self.memory, r='equal') x, y = self.cells[0].r_dist(20, 1) y -= y.min() fit = Fit(rm, x, y, minimizer=Powell, sigma_y=1 / np.sqrt(y)) res = fit.execute() par_dict = { 'a1': 75984.78344557587, 'a2': 170938.0835695505, 'r': 7.186390052694122 } for k, v in par_dict.items(): self.assertAlmostEqual(v, res.params[k], 2) self.assertAlmostEqual(21834555979.09033, res.objective_value, 3) fit = Fit(rm, x, y, minimizer=Powell) res = fit.execute() par_dict = { 'a1': 86129.37542153012, 'a2': 163073.91919617794, 'r': 7.372535479080642 } for k, v in par_dict.items(): self.assertAlmostEqual(v, res.params[k], 2) self.assertAlmostEqual(7129232.534842306, res.objective_value, 3)
def test_known_solution(self): p, c1 = parameters('p, c1') y, t = variables('y, t') p.value = 3.0 model_dict = { D(y, t): - p * y, } # Lets say we know the exact solution to this problem sol = Model({y: exp(- p * t)}) # Generate some data tdata = np.linspace(0, 3, 10001) ydata = sol(t=tdata, p=3.22)[0] ydata += np.random.normal(0, 0.005, ydata.shape) ode_model = ODEModel(model_dict, initial={t: 0.0, y: ydata[0]}) fit = Fit(ode_model, t=tdata, y=ydata) ode_result = fit.execute() c1.value = ydata[0] fit = Fit(sol, t=tdata, y=ydata) fit_result = fit.execute() self.assertAlmostEqual(ode_result.value(p) / fit_result.value(p), 1, 2) self.assertAlmostEqual(ode_result.r_squared / fit_result.r_squared, 1, 4) self.assertAlmostEqual(ode_result.stdev(p) / fit_result.stdev(p), 1, 3)
def test_vector_none_fitting(): """ Fit to a 3 component vector valued function with one variables data set to None, without bounds or guesses. """ a, b, c = parameters('a, b, c') a_i, b_i, c_i = variables('a_i, b_i, c_i') model = {a_i: a, b_i: b, c_i: c} xdata = np.array([ [10.1, 9., 10.5, 11.2, 9.5, 9.6, 10.], [102.1, 101., 100.4, 100.8, 99.2, 100., 100.8], [71.6, 73.2, 69.5, 70.2, 70.8, 70.6, 70.1], ]) fit_none = Fit(model=model, a_i=xdata[0], b_i=xdata[1], c_i=None, minimizer=MINPACK) fit = Fit(model=model, a_i=xdata[0], b_i=xdata[1], c_i=xdata[2], minimizer=MINPACK) fit_none_result = fit_none.execute() fit_result = fit.execute() assert fit_none_result.value(b) == pytest.approx(fit_result.value(b), 1e-4) assert fit_none_result.value(a) == pytest.approx(fit_result.value(a), 1e-4) # the parameter without data should be unchanged. assert fit_none_result.value(c) == pytest.approx(1.0)
def test_fixed_parameters_2(): """ Make sure parameter boundaries are respected """ x = Parameter('x', min=1) y = Variable('y') model = Model({y: x**2}) bounded_minimizers = list(subclasses(BoundedMinimizer)) for minimizer in bounded_minimizers: if minimizer is MINPACK: # Not a MINPACKable problem because it only has a param continue fit = Fit(model, minimizer=minimizer) assert isinstance(fit.objective, MinimizeModel) if minimizer is DifferentialEvolution: # Also needs a max x.max = 10 fit_result = fit.execute() x.max = None else: fit_result = fit.execute() assert fit_result.value(x) >= 1.0 assert fit_result.value(x) <= 2.0 assert fit.minimizer.bounds == [(1, None)]
def test_chained_min_signature(self): """ Test the automatic generation of the signature for ChainedMinimizer """ minimizers = [ BFGS, DifferentialEvolution, BFGS, DifferentialEvolution, BFGS ] fit = Fit(self.model, self.xx, self.yy, self.ydata, minimizer=minimizers) names = [ 'BFGS', 'DifferentialEvolution', 'BFGS_2', 'DifferentialEvolution_2', 'BFGS_3' ] for name, param_name in zip(names, fit.minimizer.__signature__.parameters): assert name == param_name # Check for equal lengths because zip is slippery that way assert len(names) == len(fit.minimizer.__signature__.parameters) for param in fit.minimizer.__signature__.parameters.values(): assert param.kind == inspect_sig.Parameter.KEYWORD_ONLY # Make sure keywords end up at the right minimizer. with pytest.raises(TypeError): # This is not a valid kwarg to DiffEvo, but it is to BFGS. Check if # we really go by name of the Minimizer, not by order. fit.execute(DifferentialEvolution={'return_all': False})
def test_gaussian_2d_fitting(): """ Tests fitting to a scalar gaussian function with 2 independent variables. Very sensitive to initial guesses, and if they are chosen too restrictive Fit actually throws a tantrum. It therefore appears to be more sensitive than NumericalLeastSquares. """ mean = (0.6, 0.4) # x, y mean 0.6, 0.4 cov = [[0.2**2, 0], [0, 0.1**2]] np.random.seed(0) data = np.random.multivariate_normal(mean, cov, 100000) # Insert them as y,x here as np f***s up cartesian conventions. ydata, xedges, yedges = np.histogram2d(data[:, 0], data[:, 1], bins=100, range=[[0.0, 1.0], [0.0, 1.0]]) xcentres = (xedges[:-1] + xedges[1:]) / 2 ycentres = (yedges[:-1] + yedges[1:]) / 2 # Make a valid grid to match ydata xx, yy = np.meshgrid(xcentres, ycentres, sparse=False, indexing='ij') x0 = Parameter(value=mean[0], min=0.0, max=1.0) sig_x = Parameter(value=0.2, min=0.0, max=0.3) y0 = Parameter(value=mean[1], min=0.0, max=1.0) sig_y = Parameter(value=0.1, min=0.0, max=0.3) A = Parameter(value=np.mean(ydata), min=0.0) x = Variable('x') y = Variable('y') g = Variable('g') model = GradientModel( {g: A * Gaussian(x, x0, sig_x) * Gaussian(y, y0, sig_y)}) fit = Fit(model, x=xx, y=yy, g=ydata) fit_result = fit.execute() assert fit_result.value(x0) == pytest.approx(np.mean(data[:, 0]), 1e-3) assert fit_result.value(y0) == pytest.approx(np.mean(data[:, 1]), 1e-3) assert np.abs(fit_result.value(sig_x)) == pytest.approx( np.std(data[:, 0]), 1e-2) assert np.abs(fit_result.value(sig_y)) == pytest.approx( np.std(data[:, 1]), 1e-2) assert (fit_result.r_squared, 0.96) # Compare with industry standard MINPACK fit_std = Fit(model, x=xx, y=yy, g=ydata, minimizer=MINPACK) fit_std_result = fit_std.execute() assert fit_std_result.value(x0) == pytest.approx(fit_result.value(x0), 1e-4) assert fit_std_result.value(y0) == pytest.approx(fit_result.value(y0), 1e-4) assert fit_std_result.value(sig_x) == pytest.approx( fit_result.value(sig_x), 1e-4) assert fit_std_result.value(sig_y) == pytest.approx( fit_result.value(sig_y), 1e-4) assert fit_std_result.r_squared == pytest.approx(fit_result.r_squared, 1e-4)
def harmonic_approximation(polygon: Polygon, n=3): from symfit import Eq, Fit, cos, parameters, pi, sin, variables def fourier_series(x, f, n=0): """ Returns a symbolic fourier series of order `n`. :param n: Order of the fourier series. :param x: Independent variable :param f: Frequency of the fourier series """ # Make the parameter objects for all the terms a0, *cos_a = parameters(','.join(['a{}'.format(i) for i in range(0, n + 1)])) sin_b = parameters(','.join(['b{}'.format(i) for i in range(1, n + 1)])) # Construct the series series = a0 + sum(ai * cos(i * f * x) + bi * sin(i * f * x) for i, (ai, bi) in enumerate(zip(cos_a, sin_b), start=1)) return series x, y = variables('x, y') w, = parameters('w') fourier = fourier_series(x, f=w, n=n) model_dict = {y: fourier} print(model_dict) # Extract data from argument # FIXME: how to make a clockwise strictly increasing curve? xdata, ydata = polygon.exterior.xy t = np.linspace(0, 2 * np.pi, num=len(xdata)) constr = [ # Ge(x, 0), Le(x, 2 * pi), Eq(fourier.subs({x: 0}), fourier.subs({x: 2 * pi})), Eq(fourier.diff(x).subs({x: 0}), fourier.diff(x).subs({x: 2 * pi})), # Eq(fourier.diff(x, 2).subs({x: 0}), fourier.diff(x, 2).subs({x: 2 * pi})), ] print(constr) fit_x = Fit(model_dict, x=t, y=xdata, constraints=constr) fit_y = Fit(model_dict, x=t, y=ydata, constraints=constr) fitx_result = fit_x.execute() fity_result = fit_y.execute() print(fitx_result) print(fity_result) # Define function that generates the curve def curve_lambda(_t): return np.array( [ fit_x.model(x=_t, **fitx_result.params).y, fit_y.model(x=_t, **fity_result.params).y ] ).ravel() # code to test if fit is correct plot_fit(polygon, curve_lambda, t, title='Harmonic Approximation') return curve_lambda
def test_minimize(self): """ Tests maximizing a function with and without constraints, taken from the scipy `minimize` tutorial. Compare the symfit result with the scipy result. https://docs.scipy.org/doc/scipy-0.18.1/reference/tutorial/optimize.html#constrained-minimization-of-multivariate-scalar-functions-minimize """ x = Parameter(value=-1.0) y = Parameter(value=1.0) # Use an unnamed Variable on purpose to test the auto-generation of names. model = Model(2 * x * y + 2 * x - x ** 2 - 2 * y ** 2) constraints = [ Ge(y - 1, 0), # y - 1 >= 0, Eq(x**3 - y, 0), # x**3 - y == 0, ] def func(x, sign=1.0): """ Objective function """ return sign*(2*x[0]*x[1] + 2*x[0] - x[0]**2 - 2*x[1]**2) def func_deriv(x, sign=1.0): """ Derivative of objective function """ dfdx0 = sign*(-2*x[0] + 2*x[1] + 2) dfdx1 = sign*(2*x[0] - 4*x[1]) return np.array([ dfdx0, dfdx1 ]) cons = ( {'type': 'eq', 'fun' : lambda x: np.array([x[0]**3 - x[1]]), 'jac' : lambda x: np.array([3.0*(x[0]**2.0), -1.0])}, {'type': 'ineq', 'fun' : lambda x: np.array([x[1] - 1]), 'jac' : lambda x: np.array([0.0, 1.0])}) # Unconstrained fit res = minimize(func, [-1.0,1.0], args=(-1.0,), jac=func_deriv, method='BFGS', options={'disp': False}) fit = Fit(model=- model) self.assertIsInstance(fit.objective, MinimizeModel) self.assertIsInstance(fit.minimizer, BFGS) fit_result = fit.execute() self.assertAlmostEqual(fit_result.value(x) / res.x[0], 1.0, 6) self.assertAlmostEqual(fit_result.value(y) / res.x[1], 1.0, 6) # Same test, but with constraints in place. res = minimize(func, [-1.0,1.0], args=(-1.0,), jac=func_deriv, constraints=cons, method='SLSQP', options={'disp': False}) from symfit.core.minimizers import SLSQP fit = Fit(- model, constraints=constraints) self.assertEqual(fit.constraints[0].constraint_type, Ge) self.assertEqual(fit.constraints[1].constraint_type, Eq) fit_result = fit.execute() self.assertAlmostEqual(fit_result.value(x), res.x[0], 6) self.assertAlmostEqual(fit_result.value(y), res.x[1], 6)
def test_minimize(): """ Tests maximizing a function with and without constraints, taken from the scipy `minimize` tutorial. Compare the symfit result with the scipy result. https://docs.scipy.org/doc/scipy-0.18.1/reference/tutorial/optimize.html#constrained-minimization-of-multivariate-scalar-functions-minimize """ x = Parameter(value=-1.0) y = Parameter(value=1.0) # Use an unnamed Variable on purpose to test the auto-generation of names. model = Model(2 * x * y + 2 * x - x ** 2 - 2 * y ** 2) constraints = [ Ge(y - 1, 0), # y - 1 >= 0, Eq(x**3 - y, 0), # x**3 - y == 0, ] def func(x, sign=1.0): """ Objective function """ return sign*(2*x[0]*x[1] + 2*x[0] - x[0]**2 - 2*x[1]**2) def func_deriv(x, sign=1.0): """ Derivative of objective function """ dfdx0 = sign*(-2*x[0] + 2*x[1] + 2) dfdx1 = sign*(2*x[0] - 4*x[1]) return np.array([dfdx0, dfdx1]) cons = ( {'type': 'eq', 'fun': lambda x: np.array([x[0]**3 - x[1]]), 'jac': lambda x: np.array([3.0*(x[0]**2.0), -1.0])}, {'type': 'ineq', 'fun': lambda x: np.array([x[1] - 1]), 'jac': lambda x: np.array([0.0, 1.0])} ) # Unconstrained fit res = minimize(func, [-1.0, 1.0], args=(-1.0,), jac=func_deriv, method='BFGS', options={'disp': False}) fit = Fit(model=-model) assert isinstance(fit.objective, MinimizeModel) assert isinstance(fit.minimizer, BFGS) fit_result = fit.execute() assert fit_result.value(x) == pytest.approx(res.x[0], 1e-6) assert fit_result.value(y) == pytest.approx(res.x[1], 1e-6) # Same test, but with constraints in place. res = minimize(func, [-1.0, 1.0], args=(-1.0,), jac=func_deriv, constraints=cons, method='SLSQP', options={'disp': False}) fit = Fit(-model, constraints=constraints) assert fit.constraints[0].constraint_type == Ge assert fit.constraints[1].constraint_type == Eq fit_result = fit.execute() assert fit_result.value(x) == pytest.approx(res.x[0], 1e-6) assert fit_result.value(y) == pytest.approx(res.x[1], 1e-6)
def test_LeastSquares(): """ Tests if the LeastSquares objective gives the right shapes of output by comparing with its analytical equivalent. """ i = Idx('i', 100) x, y = symbols('x, y', cls=Variable) X2 = symbols('X2', cls=Variable) a, b = parameters('a, b') model = Model({y: a * x**2 + b * x}) xdata = np.linspace(0, 10, 100) ydata = model(x=xdata, a=5, b=2).y + np.random.normal(0, 5, xdata.shape) # Construct a LeastSquares objective and its analytical equivalent chi2_numerical = LeastSquares(model, data={ x: xdata, y: ydata, model.sigmas[y]: np.ones_like(xdata) }) chi2_exact = Model({X2: FlattenSum(0.5 * ((a * x**2 + b * x) - y)**2, i)}) eval_exact = chi2_exact(x=xdata, y=ydata, a=2, b=3) jac_exact = chi2_exact.eval_jacobian(x=xdata, y=ydata, a=2, b=3) hess_exact = chi2_exact.eval_hessian(x=xdata, y=ydata, a=2, b=3) eval_numerical = chi2_numerical(x=xdata, a=2, b=3) jac_numerical = chi2_numerical.eval_jacobian(x=xdata, a=2, b=3) hess_numerical = chi2_numerical.eval_hessian(x=xdata, a=2, b=3) # Test model jacobian and hessian shape assert model(x=xdata, a=2, b=3)[0].shape == ydata.shape assert model.eval_jacobian(x=xdata, a=2, b=3)[0].shape == (2, 100) assert model.eval_hessian(x=xdata, a=2, b=3)[0].shape == (2, 2, 100) # Test exact chi2 shape assert eval_exact[0].shape, (1, ) assert jac_exact[0].shape, (2, 1) assert hess_exact[0].shape, (2, 2, 1) # Test if these two models have the same call, jacobian, and hessian assert eval_exact[0] == pytest.approx(eval_numerical) assert isinstance(eval_numerical, float) assert isinstance(eval_exact[0][0], float) assert np.squeeze(jac_exact[0], axis=-1) == pytest.approx(jac_numerical) assert isinstance(jac_numerical, np.ndarray) assert np.squeeze(hess_exact[0], axis=-1) == pytest.approx(hess_numerical) assert isinstance(hess_numerical, np.ndarray) fit = Fit(chi2_exact, x=xdata, y=ydata, objective=MinimizeModel) fit_exact_result = fit.execute() fit = Fit(model, x=xdata, y=ydata, absolute_sigma=True) fit_num_result = fit.execute() assert fit_exact_result.value(a) == fit_num_result.value(a) assert fit_exact_result.value(b) == fit_num_result.value(b) assert fit_exact_result.stdev(a) == pytest.approx(fit_num_result.stdev(a)) assert fit_exact_result.stdev(b) == pytest.approx(fit_num_result.stdev(b))
def test_error_analytical(self): """ Test using a case where the analytical answer is known. Uses both symfit and scipy's curve_fit. Modeled after: http://nbviewer.ipython.org/urls/gist.github.com/taldcroft/5014170/raw/31e29e235407e4913dc0ec403af7ed524372b612/curve_fit.ipynb """ N = 10000 sigma = 10.0 * np.ones(N) xn = np.arange(N, dtype=np.float) # yn = np.zeros_like(xn) np.random.seed(10) yn = np.random.normal(size=len(xn), scale=sigma) a = Parameter() y = Variable() model = {y: a} fit = Fit(model, y=yn, sigma_y=sigma) fit_result = fit.execute() popt, pcov = curve_fit(lambda x, a: a * np.ones_like(x), xn, yn, sigma=sigma, absolute_sigma=True) self.assertAlmostEqual(fit_result.value(a), popt[0], 5) self.assertAlmostEqual(fit_result.stdev(a), np.sqrt(np.diag(pcov))[0], 2) fit_no_sigma = Fit(model, yn) fit_result_no_sigma = fit_no_sigma.execute() popt, pcov = curve_fit( lambda x, a: a * np.ones_like(x), xn, yn, ) # With or without sigma, the bestfit params should be in agreement in case of equal weights self.assertAlmostEqual(fit_result.value(a), fit_result_no_sigma.value(a), 5) # Since symfit is all about absolute errors, the sigma will not be in agreement self.assertNotEqual(fit_result.stdev(a), fit_result_no_sigma.stdev(a), 5) self.assertAlmostEqual(fit_result_no_sigma.value(a), popt[0], 5) self.assertAlmostEqual(fit_result_no_sigma.stdev(a), pcov[0][0]**0.5, 5) # Analytical answer for mean of N(0,1): mu = 0.0 sigma_mu = sigma[0] / N**0.5 self.assertAlmostEqual(fit_result.stdev(a), sigma_mu, 5)
def test_simple_sigma(self): """ Make sure we produce the same results as scipy's curve_fit, with and without sigmas, and compare the results of both to a known value. """ t_data = np.array([1.4, 2.1, 2.6, 3.0, 3.3]) y_data = np.array([10, 20, 30, 40, 50]) sigma = 0.2 n = np.array([5, 3, 8, 15, 30]) sigma_t = sigma / np.sqrt(n) # We now define our model y = Variable() g = Parameter() t_model = (2 * y / g)**0.5 fit = Fit(t_model, y_data, t_data) # , sigma=sigma_t) fit_result = fit.execute() # h_smooth = np.linspace(0,60,100) # t_smooth = t_model(y=h_smooth, **fit_result.params) # Lets with the results from curve_fit, no weights popt_noweights, pcov_noweights = curve_fit(lambda y, p: (2 * y / p)**0.5, y_data, t_data) self.assertAlmostEqual(fit_result.value(g), popt_noweights[0]) self.assertAlmostEqual(fit_result.stdev(g), np.sqrt(pcov_noweights[0, 0])) # Same sigma everywere fit = Fit(t_model, y_data, t_data, 0.0031, absolute_sigma=False) fit_result = fit.execute() popt_sameweights, pcov_sameweights = curve_fit(lambda y, p: (2 * y / p)**0.5, y_data, t_data, sigma=0.0031, absolute_sigma=False) self.assertAlmostEqual(fit_result.value(g), popt_sameweights[0], 4) self.assertAlmostEqual(fit_result.stdev(g), np.sqrt(pcov_sameweights[0, 0]), 4) # Same weight everywere should be the same as no weight when absolute_sigma=False self.assertAlmostEqual(fit_result.value(g), popt_noweights[0], 4) self.assertAlmostEqual(fit_result.stdev(g), np.sqrt(pcov_noweights[0, 0]), 4) # Different sigma for every point fit = Fit(t_model, y_data, t_data, 0.1*sigma_t, absolute_sigma=False) fit_result = fit.execute() popt, pcov = curve_fit(lambda y, p: (2 * y / p)**0.5, y_data, t_data, sigma=.1*sigma_t) self.assertAlmostEqual(fit_result.value(g), popt[0]) self.assertAlmostEqual(fit_result.stdev(g), np.sqrt(pcov[0, 0])) # according to Mathematica self.assertAlmostEqual(fit_result.value(g), 9.095, 3) self.assertAlmostEqual(fit_result.stdev(g), 0.102, 3)
def test_gaussian_2d_fitting(self): """ Tests fitting to a scalar gaussian function with 2 independent variables. Very sensitive to initial guesses, and if they are chosen too restrictive Fit actually throws a tantrum. It therefore appears to be more sensitive than NumericalLeastSquares. """ mean = (0.6, 0.4) # x, y mean 0.6, 0.4 cov = [[0.2**2, 0], [0, 0.1**2]] np.random.seed(0) data = np.random.multivariate_normal(mean, cov, 100000) # Insert them as y,x here as np f***s up cartesian conventions. ydata, xedges, yedges = np.histogram2d(data[:, 0], data[:, 1], bins=100, range=[[0.0, 1.0], [0.0, 1.0]]) xcentres = (xedges[:-1] + xedges[1:]) / 2 ycentres = (yedges[:-1] + yedges[1:]) / 2 # Make a valid grid to match ydata xx, yy = np.meshgrid(xcentres, ycentres, sparse=False, indexing='ij') x0 = Parameter(value=mean[0], min=0.0, max=1.0) sig_x = Parameter(value=0.2, min=0.0, max=0.3) y0 = Parameter(value=mean[1], min=0.0, max=1.0) sig_y = Parameter(value=0.1, min=0.0, max=0.3) A = Parameter(value=np.mean(ydata), min=0.0) x = Variable('x') y = Variable('y') g = Variable('g') model = Model({g: A * Gaussian(x, x0, sig_x) * Gaussian(y, y0, sig_y)}) fit = Fit(model, x=xx, y=yy, g=ydata) fit_result = fit.execute() self.assertAlmostEqual(fit_result.value(x0), np.mean(data[:, 0]), 3) self.assertAlmostEqual(fit_result.value(y0), np.mean(data[:, 1]), 3) self.assertAlmostEqual(np.abs(fit_result.value(sig_x)), np.std(data[:, 0]), 2) self.assertAlmostEqual(np.abs(fit_result.value(sig_y)), np.std(data[:, 1]), 2) self.assertGreaterEqual(fit_result.r_squared, 0.96) # Compare with industry standard MINPACK fit_std = Fit(model, x=xx, y=yy, g=ydata, minimizer=MINPACK) fit_std_result = fit_std.execute() self.assertAlmostEqual(fit_std_result.value(x0), fit_result.value(x0), 4) self.assertAlmostEqual(fit_std_result.value(y0), fit_result.value(y0), 4) self.assertAlmostEqual(fit_std_result.value(sig_x), fit_result.value(sig_x), 4) self.assertAlmostEqual(fit_std_result.value(sig_y), fit_result.value(sig_y), 4) self.assertAlmostEqual(fit_std_result.r_squared, fit_result.r_squared, 4)
def test_covariances(self): """ Compare the equal and unequal length handeling of `HasCovarianceMatrix`. If it works properly, the unequal length method should reduce to the equal length one if called qith equal length data. Computing unequal dataset length covariances remains something to be careful with, but this backwards compatibility provides some validation. """ N = 10000 a, b, c = parameters('a, b, c') a_i, b_i, c_i = variables('a_i, b_i, c_i') model = {a_i: a, b_i: b, c_i: c} np.random.seed(1) # Sample from a multivariate normal with correlation. pcov = 1e-1 * np.array([[0.4, 0.3, 0.5], [0.3, 0.8, 0.4], [0.5, 0.4, 1.2]]) xdata = np.random.multivariate_normal([10, 100, 70], pcov, N).T fit = Fit( model=model, a_i=xdata[0], b_i=xdata[1], c_i=xdata[2], absolute_sigma=False ) fit_result = fit.execute() cov_equal = fit._cov_mat_equal_lenghts(fit_result.params) cov_unequal = fit._cov_mat_unequal_lenghts(fit_result.params) np.testing.assert_array_almost_equal(cov_equal, cov_unequal) # Try with absolute_sigma=True fit = Fit( model=model, a_i=xdata[0], b_i=xdata[1], c_i=xdata[2], sigma_a_i=np.sqrt(pcov[0, 0]), sigma_b_i=np.sqrt(pcov[1, 1]), sigma_c_i=np.sqrt(pcov[2, 2]), absolute_sigma=True ) fit_result = fit.execute() cov_equal = fit._cov_mat_equal_lenghts(fit_result.params) cov_unequal = fit._cov_mat_unequal_lenghts(fit_result.params) np.testing.assert_array_almost_equal(cov_equal, cov_unequal)
def test_vector_fitting_bounds(self): """ Tests fitting to a 3 component vector valued function, with bounds. """ a, b, c = parameters('a, b, c') a.min = 0 a.max = 25 b.min = 0 b.max = 500 a_i, b_i, c_i = variables('a_i, b_i, c_i') model = {a_i: a, b_i: b, c_i: c} xdata = np.array([ [10.1, 9., 10.5, 11.2, 9.5, 9.6, 10.], [102.1, 101., 100.4, 100.8, 99.2, 100., 100.8], [71.6, 73.2, 69.5, 70.2, 70.8, 70.6, 70.1], ]) fit = Fit( model=model, a_i=xdata[0], b_i=xdata[1], c_i=xdata[2], ) fit_result = fit.execute() self.assertAlmostEqual(fit_result.value(a), np.mean(xdata[0]), 4) self.assertAlmostEqual(fit_result.value(b), np.mean(xdata[1]), 4) self.assertAlmostEqual(fit_result.value(c), np.mean(xdata[2]), 4)
def test_fitting(self): xdata = np.linspace(1,10,10) ydata = 3*xdata**2 a = Parameter() #3.1, min=2.5, max=3.5 b = Parameter() x = Variable() new = a*x**b fit = Fit(new, xdata, ydata, minimizer=MINPACK) fit_result = fit.execute() self.assertIsInstance(fit_result, FitResults) self.assertAlmostEqual(fit_result.value(a), 3.0) self.assertAlmostEqual(fit_result.value(b), 2.0) self.assertIsInstance(fit_result.stdev(a), float) self.assertIsInstance(fit_result.stdev(b), float) self.assertIsInstance(fit_result.r_squared, float) self.assertEqual(fit_result.r_squared, 1.0) # by definition since there's no fuzzyness # Test several illegal ways to access the data. self.assertRaises(AttributeError, getattr, *[fit_result.params, 'a_fdska']) self.assertRaises(AttributeError, getattr, *[fit_result.params, 'c']) self.assertRaises(AttributeError, getattr, *[fit_result.params, 'a_stdev_stdev']) self.assertRaises(AttributeError, getattr, *[fit_result.params, 'a_stdev_']) self.assertRaises(AttributeError, getattr, *[fit_result.params, 'a__stdev'])
def test_initial_parameters(): """ Identical to test_polgar, but with a0 as free Parameter. """ a, b, c, d, t = variables('a, b, c, d, t') k, p, l, m = parameters('k, p, l, m') a0 = Parameter('a0', min=0, value=10, fixed=True) c0 = Parameter('c0', min=0, value=0.1) b = a0 - d + a model_dict = { D(d, t): l * c * b - m * d, D(c, t): k * a * b - p * c - l * c * b + m * d, D(a, t): - k * a * b + p * c, } ode_model = ODEModel(model_dict, initial={t: 0.0, a: a0, c: c0, d: 0.0}) # Generate some data tdata = np.linspace(0, 3, 1000) # Eval AA, AAB, BAAB = ode_model(t=tdata, k=0.1, l=0.2, m=.3, p=0.3, a0=10, c0=0) fit = Fit(ode_model, t=tdata, a=AA, c=AAB, d=BAAB) results = fit.execute() print(results) assert results.value(a0) == pytest.approx(10, abs=1e-8) assert results.value(c0) == pytest.approx(0, abs=1e-8) assert ode_model.params == [a0, c0, k, l, m, p] assert ode_model.initial_params == [a0, c0] assert ode_model.model_params == [a0, k, l, m, p]
def test_vector_fitting(self): """ Tests fitting to a 3 component vector valued function, without bounds or guesses. """ a, b, c = parameters('a, b, c') a_i, b_i, c_i = variables('a_i, b_i, c_i') model = {a_i: a, b_i: b, c_i: c} xdata = np.array([ [10.1, 9., 10.5, 11.2, 9.5, 9.6, 10.], [102.1, 101., 100.4, 100.8, 99.2, 100., 100.8], [71.6, 73.2, 69.5, 70.2, 70.8, 70.6, 70.1], ]) fit = Fit( model=model, a_i=xdata[0], b_i=xdata[1], c_i=xdata[2], minimizer = MINPACK ) fit_result = fit.execute() self.assertAlmostEqual(fit_result.value(a) / 9.985691, 1.0, 5) self.assertAlmostEqual(fit_result.value(b) / 1.006143e+02, 1.0, 4) self.assertAlmostEqual(fit_result.value(c) / 7.085713e+01, 1.0, 5)
def test_gaussian_fitting(self): """ Tests fitting to a gaussian function and fit_result.params unpacking. """ xdata = 2*np.random.rand(10000) - 1 # random betwen [-1, 1] ydata = 5.0 * scipy.stats.norm.pdf(xdata, loc=0.0, scale=1.0) x0 = Parameter() sig = Parameter() A = Parameter() x = Variable() g = A * Gaussian(x, x0, sig) fit = Fit(g, xdata, ydata) fit_result = fit.execute() self.assertAlmostEqual(fit_result.value(A), 5.0) self.assertAlmostEqual(np.abs(fit_result.value(sig)), 1.0) self.assertAlmostEqual(fit_result.value(x0), 0.0) # raise Exception([i for i in fit_result.params]) sexy = g(x=2.0, **fit_result.params) ugly = g( x=2.0, x0=fit_result.value(x0), A=fit_result.value(A), sig=fit_result.value(sig), ) self.assertEqual(sexy, ugly)
def test_fitting(self): """ Tests fitting with NumericalLeastSquares. Makes sure that the resulting objects and values are of the right type, and that the fit_result does not have unexpected members. """ xdata = np.linspace(1, 10, 10) ydata = 3*xdata**2 a = Parameter() # 3.1, min=2.5, max=3.5 b = Parameter() x = Variable() new = a*x**b fit = Fit(new, xdata, ydata, minimizer=MINPACK) fit_result = fit.execute() self.assertIsInstance(fit_result, FitResults) self.assertAlmostEqual(fit_result.value(a), 3.0) self.assertAlmostEqual(fit_result.value(b), 2.0) self.assertIsInstance(fit_result.stdev(a), float) self.assertIsInstance(fit_result.stdev(b), float) self.assertIsInstance(fit_result.r_squared, float) self.assertEqual(fit_result.r_squared, 1.0) # by definition since there's no fuzzyness
def test_likelihood_fitting_gaussian(self): """ Fit using the likelihood method. """ mu, sig = parameters('mu, sig') sig.min = 0.01 sig.value = 3.0 mu.value = 50. x = Variable() pdf = Gaussian(x, mu, sig) np.random.seed(10) xdata = np.random.normal(51., 3.5, 10000) # Expected parameter values mean = np.mean(xdata) stdev = np.std(xdata) mean_stdev = stdev/np.sqrt(len(xdata)) fit = Fit(pdf, xdata, objective=LogLikelihood) fit_result = fit.execute() self.assertAlmostEqual(fit_result.value(mu) / mean, 1, 6) self.assertAlmostEqual(fit_result.stdev(mu) / mean_stdev, 1, 3) self.assertAlmostEqual(fit_result.value(sig) / np.std(xdata), 1, 6)
def test_likelihood_fitting_exponential(self): """ Fit using the likelihood method. """ b = Parameter(value=4, min=3.0) x, y = variables('x, y') pdf = {y: Exp(x, 1/b)} # Draw points from an Exp(5) exponential distribution. np.random.seed(100) xdata = np.random.exponential(5, 1000000) # Expected parameter values mean = np.mean(xdata) stdev = np.std(xdata) mean_stdev = stdev / np.sqrt(len(xdata)) with self.assertRaises(NotImplementedError): fit = Fit(pdf, x=xdata, sigma_y=2.0, objective=LogLikelihood) fit = Fit(pdf, xdata, objective=LogLikelihood) fit_result = fit.execute() self.assertAlmostEqual(fit_result.value(b) / mean, 1, 3) self.assertAlmostEqual(fit_result.value(b) / stdev, 1, 3) self.assertAlmostEqual(fit_result.stdev(b) / mean_stdev, 1, 3)
def test_2D_fitting(): """ Makes sure that a scalar model with 2 independent variables has the proper signature, and that the fit result is of the correct type. """ xdata = np.random.randint(-10, 11, size=(2, 400)) zdata = 2.5 * xdata[0]**2 + 7.0 * xdata[1]**2 a = Parameter('a') b = Parameter('b') x = Variable('x') y = Variable('y') new = a * x**2 + b * y**2 fit = Fit(new, xdata[0], xdata[1], zdata) result = fit.model(xdata[0], xdata[1], 2, 3) assert isinstance(result, tuple) for arg_name, name in zip(('x', 'y', 'a', 'b'), inspect_sig.signature(fit.model).parameters): assert arg_name == name fit_result = fit.execute() assert isinstance(fit_result, FitResults)
def test_fixed_parameters(): """ Make sure fixed parameters don't change on fitting """ a, b, c, d = parameters('a, b, c, d') x, y = variables('x, y') c.value = 4.0 a.min, a.max = 1.0, 5.0 # Bounds are needed for DifferentialEvolution b.min, b.max = 1.0, 5.0 c.min, c.max = 1.0, 5.0 d.min, d.max = 1.0, 5.0 c.fixed = True model = Model({y: a * exp(-(x - b)**2 / (2 * c**2)) + d}) # Generate data xdata = np.linspace(0, 100) ydata = model(xdata, a=2, b=3, c=2, d=2).y for minimizer in subclasses(BaseMinimizer): if minimizer is ChainedMinimizer: continue else: fit = Fit(model, x=xdata, y=ydata, minimizer=minimizer) fit_result = fit.execute() # Should still be 4.0, not 2.0! assert 4.0 == fit_result.params['c']
def test_gaussian_fitting(): """ Tests fitting to a gaussian function and fit_result.params unpacking. """ xdata = 2 * np.random.rand(10000) - 1 # random betwen [-1, 1] ydata = 5.0 * scipy.stats.norm.pdf(xdata, loc=0.0, scale=1.0) x0 = Parameter('x0') sig = Parameter('sig') A = Parameter('A') x = Variable('x') g = GradientModel(A * Gaussian(x, x0, sig)) fit = Fit(g, xdata, ydata) assert isinstance(fit.objective, LeastSquares) fit_result = fit.execute() assert fit_result.value(A) == pytest.approx(5.0) assert np.abs(fit_result.value(sig)) == pytest.approx(1.0) assert fit_result.value(x0) == pytest.approx(0.0) # raise Exception([i for i in fit_result.params]) sexy = g(x=2.0, **fit_result.params) ugly = g( x=2.0, x0=fit_result.value(x0), A=fit_result.value(A), sig=fit_result.value(sig), ) assert sexy == ugly
def test_likelihood_fitting_gaussian(): """ Fit using the likelihood method. """ mu, sig = parameters('mu, sig') sig.min = 0.01 sig.value = 3.0 mu.value = 50. x = Variable('x') pdf = GradientModel(Gaussian(x, mu, sig)) np.random.seed(10) # TODO: Do we really need 1k points? xdata = np.random.normal(51., 3.5, 10000) # Expected parameter values mean = np.mean(xdata) stdev = np.std(xdata) mean_stdev = stdev / np.sqrt(len(xdata)) fit = Fit(pdf, xdata, objective=LogLikelihood) fit_result = fit.execute() assert fit_result.value(mu) == pytest.approx(mean, 1e-6) assert fit_result.stdev(mu) == pytest.approx(mean_stdev, 1e-3) assert fit_result.value(sig) == pytest.approx(np.std(xdata), 1e-6)
def test_vector_fitting(): """ Tests fitting to a 3 component vector valued function, without bounds or guesses. """ a, b, c = parameters('a, b, c') a_i, b_i, c_i = variables('a_i, b_i, c_i') model = {a_i: a, b_i: b, c_i: c} xdata = np.array([ [10.1, 9., 10.5, 11.2, 9.5, 9.6, 10.], [102.1, 101., 100.4, 100.8, 99.2, 100., 100.8], [71.6, 73.2, 69.5, 70.2, 70.8, 70.6, 70.1], ]) fit = Fit(model=model, a_i=xdata[0], b_i=xdata[1], c_i=xdata[2], minimizer=MINPACK) fit_result = fit.execute() assert fit_result.value(a) == pytest.approx(np.mean(xdata[0]), 1e-5) assert fit_result.value(b) == pytest.approx(np.mean(xdata[1]), 1e-4) assert fit_result.value(c) == pytest.approx(np.mean(xdata[2]), 1e-5)
def fit_gauss2d(arr): Y, X = np.indices(arr.shape) total = arr.sum() x = (X * arr).sum() / total y = (Y * arr).sum() / total col = arr[:, int(y)] width_x = np.sqrt( np.abs((np.arange(col.size) - y)**2 * col).sum() / col.sum()) row = arr[int(x), :] width_y = np.sqrt( np.abs((np.arange(row.size) - x)**2 * row).sum() / row.sum()) base = 0 idx = np.argmax(arr) y_mu, x_mu = np.unravel_index(idx, arr.shape) print(arr.max(), x_mu, y_mu, width_x, width_y, base) model = model_gauss2d(arr.max(), x_mu, y_mu, width_x, width_y, base, has_base=False) fit = Fit(model, z_var=arr, x_var=X, y_var=Y) return fit.execute(), fit.model
def test_vector_fitting_guess(self): """ Tests fitting to a 3 component vector valued function, with guesses. """ a, b, c = parameters('a, b, c') a.value = 10 b.value = 100 a_i, b_i, c_i = variables('a_i, b_i, c_i') model = {a_i: a, b_i: b, c_i: c} xdata = np.array([ [10.1, 9., 10.5, 11.2, 9.5, 9.6, 10.], [102.1, 101., 100.4, 100.8, 99.2, 100., 100.8], [71.6, 73.2, 69.5, 70.2, 70.8, 70.6, 70.1], ]) fit = Fit( model=model, a_i=xdata[0], b_i=xdata[1], c_i=xdata[2], minimizer = MINPACK ) fit_result = fit.execute() self.assertAlmostEqual(fit_result.value(a), np.mean(xdata[0]), 4) self.assertAlmostEqual(fit_result.value(b), np.mean(xdata[1]), 4) self.assertAlmostEqual(fit_result.value(c), np.mean(xdata[2]), 4)
def test_fitting(): """ Tests fitting with NumericalLeastSquares. Makes sure that the resulting objects and values are of the right type, and that the fit_result does not have unexpected members. """ xdata = np.linspace(1, 10, 10) ydata = 3 * xdata**2 a = Parameter('a') # 3.1, min=2.5, max=3.5 b = Parameter('b') x = Variable('x') new = a * x**b fit = Fit(new, xdata, ydata, minimizer=MINPACK) fit_result = fit.execute() assert isinstance(fit_result, FitResults) assert fit_result.value(a) == pytest.approx(3.0) assert fit_result.value(b) == pytest.approx(2.0) assert isinstance(fit_result.stdev(a), float) assert isinstance(fit_result.stdev(b), float) assert isinstance(fit_result.r_squared, float) assert fit_result.r_squared == 1.0 # by definition since there's no fuzzyness
def modeling(pdata, xdata, sdata, tdata): X, S, P, t = variables('X, S, P, t') k = Parameter('k', 0.1) umax = Parameter('umax', min=0.06, max=0.25) Ki = Parameter('Ki', min=10, max=80) Ks = Parameter('Ks', min=0.5, max=8) Kip = Parameter('Kip', min=10, max=17) mx = Parameter('mx', min=0.001, max=0.1) alpha = Parameter('alpha', min=0.1, max=2.4) beta = Parameter('beta', min=0.001, max=1.2) X0 = 0.01 S0 = 50 P0 = 0.01 model_dict = { D(X, t): umax * S / (Ks + S) * X, D(S, t): -umax * S / (Ks + S) * X, D(P, t): umax * S / (Ks + S) } ode_model_monod = ODEModel(model_dict, initial={ t: 0.0, X: X0, S: S0, P: P0 }) fit = Fit(ode_model_monod, t=tdata, X=xdata, S=sdata, P=pdata) fit_result = fit.execute() return ode_model_monod, fit_result
def test_constraint_types(self): x = Parameter(value=-1.0) y = Parameter(value=1.0) z = Variable() model = Model({z: 2*x*y + 2*x - x**2 - 2*y**2}) # These types are not allowed constraints. for relation in [Lt, Gt, Ne]: with self.assertRaises(ModelError): Fit(model, constraints=[relation(x, y)]) # Should execute without problems. for relation in [Eq, Ge, Le]: Fit(model, constraints=[relation(x, y)]) fit = Fit(model, constraints=[Le(x, y)]) # Le should be transformed to Ge self.assertIs(fit.constraints[0].constraint_type, Ge) # Redo the standard test as a Le constraints = [ Le(- y + 1, 0), # y - 1 >= 0, Eq(x**3 - y, 0), # x**3 - y == 0, ] std_constraints = [ Ge(y - 1, 0), # y - 1 >= 0, Eq(x**3 - y, 0), # x**3 - y == 0, ] fit = Fit(- model, constraints=constraints) std_fit = Fit(- model, constraints=std_constraints) self.assertEqual(fit.constraints[0].constraint_type, Ge) self.assertEqual(fit.constraints[1].constraint_type, Eq) self.assertEqual(fit.constraints[0].params, [x, y]) self.assertEqual(fit.constraints[1].params, [x, y]) self.assertEqual(fit.constraints[0].jacobian_model.params, [x, y]) self.assertEqual(fit.constraints[1].jacobian_model.params, [x, y]) self.assertEqual(fit.constraints[0].hessian_model.params, [x, y]) self.assertEqual(fit.constraints[1].hessian_model.params, [x, y]) self.assertEqual(fit.constraints[0].__signature__, fit.constraints[1].__signature__) fit_result = fit.execute() std_result = std_fit.execute() self.assertAlmostEqual(fit_result.value(x), std_result.value(x)) self.assertAlmostEqual(fit_result.value(y), std_result.value(y))
def test_constrainedminimizers(self): """ Compare the different constrained minimizers, to make sure all support constraints, and converge to the same answer. """ minimizers = list(subclasses(ScipyConstrainedMinimize)) x = Parameter('x', value=-1.0) y = Parameter('y', value=1.0) z = Variable('z') model = Model({z: 2 * x * y + 2 * x - x ** 2 - 2 * y ** 2}) # First we try an unconstrained fit results = [] for minimizer in minimizers: fit = Fit(- model, minimizer=minimizer) fit_result = fit.execute(tol=1e-15) results.append(fit_result) # Compare the parameter values. for r1, r2 in zip(results[:-1], results[1:]): self.assertAlmostEqual(r1.value(x), r2.value(x), 6) self.assertAlmostEqual(r1.value(y), r2.value(y), 6) np.testing.assert_almost_equal(r1.covariance_matrix, r2.covariance_matrix) constraints = [ Ge(y - 1, 0), # y - 1 >= 0, Eq(x ** 3 - y, 0), # x**3 - y == 0, ] # Constrained fit. results = [] for minimizer in minimizers: if minimizer is COBYLA: # COBYLA only supports inequility. continue fit = Fit(- model, constraints=constraints, minimizer=minimizer) fit_result = fit.execute(tol=1e-15) results.append(fit_result) for r1, r2 in zip(results[:-1], results[1:]): self.assertAlmostEqual(r1.value(x), r2.value(x), 6) self.assertAlmostEqual(r1.value(y), r2.value(y), 6) np.testing.assert_almost_equal(r1.covariance_matrix, r2.covariance_matrix)
def test_interdependency_constrained(self): """ Test a model with interdependent components, and with constraints which depend on the Model's output. This is done in the MatrixSymbol formalism, using a Tikhonov regularization as an example. In this, a matrix inverse has to be calculated and is used multiple times. Therefore we split that term of into a seperate component, so the inverse only has to be computed once per model call. See https://arxiv.org/abs/1901.05348 for a more detailed background. """ N = Symbol('N', integer=True) M = MatrixSymbol('M', N, N) W = MatrixSymbol('W', N, N) I = MatrixSymbol('I', N, N) y = MatrixSymbol('y', N, 1) c = MatrixSymbol('c', N, 1) a, = parameters('a') z, = variables('z') i = Idx('i') model_dict = { W: Inverse(I + M / a ** 2), c: - W * y, z: sqrt(c.T * c) } # Sympy currently does not support derivatives of matrix expressions, # so we use CallableModel instead of Model. model = CallableModel(model_dict) # Generate data iden = np.eye(2) M_mat = np.array([[2, 1], [3, 4]]) y_vec = np.array([[3], [5]]) eval_model = model(I=iden, M=M_mat, y=y_vec, a=0.1) # Calculate the answers 'manually' so I know it was done properly W_manual = np.linalg.inv(iden + M_mat / 0.1 ** 2) c_manual = - np.atleast_2d(W_manual.dot(y_vec)) z_manual = np.atleast_1d(np.sqrt(c_manual.T.dot(c_manual))) self.assertEqual(y_vec.shape, (2, 1)) self.assertEqual(M_mat.shape, (2, 2)) self.assertEqual(iden.shape, (2, 2)) self.assertEqual(W_manual.shape, (2, 2)) self.assertEqual(c_manual.shape, (2, 1)) self.assertEqual(z_manual.shape, (1, 1)) np.testing.assert_almost_equal(W_manual, eval_model.W) np.testing.assert_almost_equal(c_manual, eval_model.c) np.testing.assert_almost_equal(z_manual, eval_model.z) fit = Fit(model, z=z_manual, I=iden, M=M_mat, y=y_vec) fit_result = fit.execute() # See if a == 0.1 was reconstructed properly. Since only a**2 features # in the equations, we check for the absolute value. Setting a.min = 0.0 # is not appreciated by the Minimizer, it seems. self.assertAlmostEqual(np.abs(fit_result.value(a)), 0.1)
def test_2_gaussian_2d_fitting(self): """ Tests fitting to a scalar gaussian with 2 independent variables with tight bounds. """ mean = (0.3, 0.4) # x, y mean 0.6, 0.4 cov = [[0.01**2, 0], [0, 0.01**2]] data = np.random.multivariate_normal(mean, cov, 3000000) mean = (0.7, 0.8) # x, y mean 0.6, 0.4 cov = [[0.01**2, 0], [0, 0.01**2]] data_2 = np.random.multivariate_normal(mean, cov, 3000000) data = np.vstack((data, data_2)) # Insert them as y,x here as np f***s up cartesian conventions. ydata, xedges, yedges = np.histogram2d(data[:, 1], data[:, 0], bins=100, range=[[0.0, 1.0], [0.0, 1.0]]) xcentres = (xedges[:-1] + xedges[1:]) / 2 ycentres = (yedges[:-1] + yedges[1:]) / 2 # Make a valid grid to match ydata xx, yy = np.meshgrid(xcentres, ycentres, sparse=False) # xdata = np.dstack((xx, yy)).T x = Variable() y = Variable() x0_1 = Parameter(0.7, min=0.6, max=0.9) sig_x_1 = Parameter(0.1, min=0.0, max=0.2) y0_1 = Parameter(0.8, min=0.6, max=0.9) sig_y_1 = Parameter(0.1, min=0.0, max=0.2) A_1 = Parameter() g_1 = A_1 * Gaussian(x, x0_1, sig_x_1) * Gaussian(y, y0_1, sig_y_1) x0_2 = Parameter(0.3, min=0.2, max=0.5) sig_x_2 = Parameter(0.1, min=0.0, max=0.2) y0_2 = Parameter(0.4, min=0.2, max=0.5) sig_y_2 = Parameter(0.1, min=0.0, max=0.2) A_2 = Parameter() g_2 = A_2 * Gaussian(x, x0_2, sig_x_2) * Gaussian(y, y0_2, sig_y_2) model = g_1 + g_2 fit = Fit(model, xx, yy, ydata) fit_result = fit.execute() self.assertIsInstance(fit.fit, ConstrainedNumericalLeastSquares) img = model(x=xx, y=yy, **fit_result.params) img_g_1 = g_1(x=xx, y=yy, **fit_result.params) img_g_2 = g_2(x=xx, y=yy, **fit_result.params) np.testing.assert_array_equal(img, img_g_1 + img_g_2) # Equal up to some precision. Not much obviously. self.assertAlmostEqual(fit_result.value(x0_1), 0.7, 3) self.assertAlmostEqual(fit_result.value(y0_1), 0.8, 3) self.assertAlmostEqual(fit_result.value(x0_2), 0.3, 3) self.assertAlmostEqual(fit_result.value(y0_2), 0.4, 3)
def test_mexican_hat(self): """ Test that global minimisation finds the global minima, and doesn't affect the value of parameters. """ x = Parameter('x') x.min, x.max = -100, 100 x.value = -2.5 y = Variable('y') model = Model({y: x**4 - 10 * x**2 - x}) # Skewed Mexican hat fit = Fit(model, minimizer=[DifferentialEvolution, BFGS]) fit_result1 = fit.execute(DifferentialEvolution={'seed': 0}) fit = Fit(model) fit_result2 = fit.execute() self.assertGreater(fit_result1.value(x), 0) self.assertLess(fit_result2.value(x), 0)
def test_global_fitting(self): """ Test a global fitting scenario with datasets of unequal length. In this scenario, a quartic equation is fitted where the constant term is shared between the datasets. (e.g. identical background noise) """ x_1, x_2, y_1, y_2 = variables('x_1, x_2, y_1, y_2') y0, a_1, a_2, b_1, b_2 = parameters('y0, a_1, a_2, b_1, b_2') # The following vector valued function links all the equations together # as stated in the intro. model = Model({ y_1: a_1 * x_1**2 + b_1 * x_1 + y0, y_2: a_2 * x_2**2 + b_2 * x_2 + y0, }) # Generate data from this model # xdata = np.linspace(0, 10) xdata1 = np.linspace(0, 10) xdata2 = xdata1[::2] # Make the sets of unequal size ydata1, ydata2 = model(x_1=xdata1, x_2=xdata2, a_1=101.3, b_1=0.5, a_2=56.3, b_2=1.1111, y0=10.8) # Add some noise to make it appear like real data np.random.seed(1) ydata1 += np.random.normal(0, 2, size=ydata1.shape) ydata2 += np.random.normal(0, 2, size=ydata2.shape) xdata = [xdata1, xdata2] ydata = [ydata1, ydata2] # Guesses a_1.value = 100 a_2.value = 50 b_1.value = 1 b_2.value = 1 y0.value = 10 eval_jac = model.eval_jacobian(x_1=xdata1, x_2=xdata2, a_1=101.3, b_1=0.5, a_2=56.3, b_2=1.1111, y0=10.8) self.assertEqual(len(eval_jac), 2) for comp in eval_jac: self.assertEqual(len(comp), len(model.params)) sigma_y = np.concatenate((np.ones(20), [2., 4., 5, 7, 3])) fit = Fit(model, x_1=xdata[0], x_2=xdata[1], y_1=ydata[0], y_2=ydata[1], sigma_y_2=sigma_y) fit_result = fit.execute() # fit_curves = model(x_1=xdata[0], x_2=xdata[1], **fit_result.params) self.assertAlmostEqual(fit_result.value(y0), 1.061892e+01, 3) self.assertAlmostEqual(fit_result.value(a_1), 1.013269e+02, 3) self.assertAlmostEqual(fit_result.value(a_2), 5.625694e+01, 3) self.assertAlmostEqual(fit_result.value(b_1), 3.362240e-01, 3) self.assertAlmostEqual(fit_result.value(b_2), 1.565253e+00, 3)
def test_error_analytical(self): """ Test using a case where the analytical answer is known. Uses both symfit and scipy's curve_fit. Modeled after: http://nbviewer.ipython.org/urls/gist.github.com/taldcroft/5014170/raw/31e29e235407e4913dc0ec403af7ed524372b612/curve_fit.ipynb """ N = 10000 sigma = 10.0 xn = np.arange(N, dtype=np.float) # yn = np.zeros_like(xn) np.random.seed(10) yn = np.random.normal(size=len(xn), scale=sigma) a = Parameter() y = Variable() model = {y: a} fit = Fit(model, y=yn, sigma_y=sigma) fit_result = fit.execute() popt, pcov = curve_fit(lambda x, a: a * np.ones_like(x), xn, yn, sigma=sigma, absolute_sigma=True) self.assertAlmostEqual(fit_result.value(a), popt[0], 5) self.assertAlmostEqual(fit_result.stdev(a), np.sqrt(np.diag(pcov))[0], 2) fit_no_sigma = Fit(model, yn) fit_result_no_sigma = fit_no_sigma.execute() popt, pcov = curve_fit(lambda x, a: a * np.ones_like(x), xn, yn,) # With or without sigma, the bestfit params should be in agreement in case of equal weights self.assertAlmostEqual(fit_result.value(a), fit_result_no_sigma.value(a), 5) # Since symfit is all about absolute errors, the sigma will not be in agreement self.assertNotEqual(fit_result.stdev(a), fit_result_no_sigma.stdev(a), 5) self.assertAlmostEqual(fit_result_no_sigma.value(a), popt[0], 5) self.assertAlmostEqual(fit_result_no_sigma.stdev(a), pcov[0][0]**0.5, 5) # Analytical answer for mean of N(0,1): mu = 0.0 sigma_mu = sigma/N**0.5 self.assertAlmostEqual(fit_result.stdev(a), sigma_mu, 5)
def test_CallableNumericalModel2D(self): """ Apply a CallableNumericalModel to 2D data, to see if it is agnostic to data shape. """ shape = (30, 40) def function(a, b): out = np.ones(shape) * a out[15:, :] += b return out a, b = parameters('a, b') y, = variables('y') model = CallableNumericalModel({y: function}, [], [a, b]) data = 15 * np.ones(shape) data[15:, :] += 20 fit = Fit(model, y=data) fit_result = fit.execute() self.assertAlmostEqual(fit_result.value(a), 15) self.assertAlmostEqual(fit_result.value(b), 20) def flattened_function(a, b): out = np.ones(shape) * a out[15:, :] += b return out.flatten() model = CallableNumericalModel({y: flattened_function}, [], [a, b]) data = 15 * np.ones(shape) data[15:, :] += 20 data = data.flatten() fit = Fit(model, y=data) flat_result = fit.execute() self.assertAlmostEqual(fit_result.value(a), flat_result.value(a)) self.assertAlmostEqual(fit_result.value(b), flat_result.value(b)) self.assertAlmostEqual(fit_result.stdev(a), flat_result.stdev(a)) self.assertAlmostEqual(fit_result.stdev(b), flat_result.stdev(b)) self.assertAlmostEqual(fit_result.r_squared, flat_result.r_squared)
def test_chained_min(self): """Test fitting with a chained minimizer""" curvals = [p.value for p in self.model.params] fit = Fit(self.model, self.xx, self.yy, self.ydata, minimizer=[DifferentialEvolution, BFGS]) fit_result = fit.execute( DifferentialEvolution={'seed': 0, 'tol': 1e-4, 'maxiter': 10} ) self.assertAlmostEqual(fit_result.value(self.x0_1), 0.4, 4) self.assertAlmostEqual(fit_result.value(self.y0_1), 0.4, 4) self.assertEqual(curvals, [p.value for p in self.model.params])
def test_param_error_analytical(self): """ Take an example in which the parameter errors are known and see if `Fit` reproduces them. It also needs to support the absolute_sigma argument. """ N = 10000 sigma = 25.0 xn = np.arange(N, dtype=np.float) np.random.seed(110) yn = np.random.normal(size=xn.shape, scale=sigma) a = Parameter() y = Variable('y') model = {y: a} constr_fit = Fit(model, y=yn, sigma_y=sigma) constr_result = constr_fit.execute() fit = Fit(model, y=yn, sigma_y=sigma, minimizer=MINPACK) fit_result = fit.execute() self.assertAlmostEqual(fit_result.value(a), constr_result.value(a), 5) self.assertAlmostEqual(fit_result.stdev(a), constr_result.stdev(a), 5) # Analytical answer for mean of N(0,sigma): sigma_mu = sigma/N**0.5 self.assertAlmostEqual(fit_result.value(a), np.mean(yn), 5) self.assertAlmostEqual(fit_result.stdev(a), sigma_mu, 5) # Compare for absolute_sigma = False. constr_fit = Fit(model, y=yn, sigma_y=sigma, absolute_sigma=False) constr_result = constr_fit.execute() fit = Fit(model, y=yn, sigma_y=sigma, minimizer=MINPACK, absolute_sigma=False) fit_result = fit.execute() self.assertAlmostEqual(fit_result.value(a), constr_result.value(a), 5) self.assertAlmostEqual(fit_result.stdev(a), constr_result.stdev(a), 5)
def test_likelihood_fitting_bivariate_gaussian(self): """ Fit using the likelihood method. """ # Make variables and parameters x = Variable('x') y = Variable('y') x0 = Parameter('x0', value=0.6, min=0.5, max=0.7) sig_x = Parameter('sig_x', value=0.1, max=1.0) y0 = Parameter('y0', value=0.7, min=0.6, max=0.9) sig_y = Parameter('sig_y', value=0.05, max=1.0) rho = Parameter('rho', value=0.001, min=-1, max=1) pdf = BivariateGaussian(x=x, mu_x=x0, sig_x=sig_x, y=y, mu_y=y0, sig_y=sig_y, rho=rho) # Draw 100000 samples from a bivariate distribution mean = [0.59, 0.8] r = 0.6 cov = np.array([[0.11 ** 2, 0.11 * 0.23 * r], [0.11 * 0.23 * r, 0.23 ** 2]]) np.random.seed(42) xdata, ydata = np.random.multivariate_normal(mean, cov, 100000).T fit = Fit(pdf, x=xdata, y=ydata, objective=LogLikelihood) fit_result = fit.execute() self.assertAlmostEqual(fit_result.value(x0) / mean[0], 1, 2) self.assertAlmostEqual(fit_result.value(y0) / mean[1], 1, 2) self.assertAlmostEqual(fit_result.value(sig_x) / np.sqrt(cov[0, 0]), 1, 2) self.assertAlmostEqual(fit_result.value(sig_y) / np.sqrt(cov[1, 1]), 1, 2) self.assertAlmostEqual(fit_result.value(rho) / r, 1, 2) marginal = integrate(pdf, (y, -oo, oo), conds='none') fit = Fit(marginal, x=xdata, objective=LogLikelihood) with self.assertRaises(NameError): # Should raise a NameError, not a TypeError, see #219 fit.execute()
def test_named_fitting(self): xdata = np.linspace(1, 10, 10) ydata = 3*xdata**2 a = Parameter('a', 1.0) b = Parameter('b', 2.5) x, y = variables('x, y') model = {y: a*x**b} fit = Fit(model, x=xdata, y=ydata) fit_result = fit.execute() self.assertIsInstance(fit_result, FitResults) self.assertAlmostEqual(fit_result.value(a), 3.0, 3) self.assertAlmostEqual(fit_result.value(b), 2.0, 4)
def test_diff_evo(self): """ Tests fitting to a scalar gaussian with 2 independent variables with wide bounds. """ fit = Fit(self.model, self.xx, self.yy, self.ydata, minimizer=BFGS) fit_result = fit.execute() self.assertIsInstance(fit.minimizer, BFGS) # Make sure a local optimizer doesn't find the answer. self.assertNotAlmostEqual(fit_result.value(self.x0_1), 0.4, 1) self.assertNotAlmostEqual(fit_result.value(self.y0_1), 0.4, 1) # On to the main event fit = Fit(self.model, self.xx, self.yy, self.ydata, minimizer=DifferentialEvolution) fit_result = fit.execute(polish=True, seed=0, tol=1e-4, maxiter=100) # Global minimizers are really bad at finding local minima though, so # roughly equal is good enough. self.assertAlmostEqual(fit_result.value(self.x0_1), 0.4, 1) self.assertAlmostEqual(fit_result.value(self.y0_1), 0.4, 1)
def test_boundaries(self): """ Make sure parameter boundaries are respected """ x = Parameter('x', min=1) y = Variable('y') model = Model({y: x**2}) bounded_minimizers = list(subclasses(BoundedMinimizer)) for minimizer in bounded_minimizers: fit = Fit(model, minimizer=minimizer) if minimizer is DifferentialEvolution: # Also needs a max x.max = 10 fit_result = fit.execute() x.max = None elif minimizer is MINPACK: pass # Not a MINPACKable problem because it only has a param else: fit_result = fit.execute() self.assertGreaterEqual(fit_result.value(x), 1.0) self.assertLessEqual(fit_result.value(x), 2.0) self.assertEqual(fit.minimizer.bounds, [(1, None)])
def test_MatrixSymbolModel(self): """ Test a model which is defined by ModelSymbols, see #194 """ N = Symbol('N', integer=True) M = MatrixSymbol('M', N, N) W = MatrixSymbol('W', N, N) I = MatrixSymbol('I', N, N) y = MatrixSymbol('y', N, 1) c = MatrixSymbol('c', N, 1) a, b = parameters('a, b') z, x = variables('z, x') model_dict = { W: Inverse(I + M / a ** 2), c: - W * y, z: sqrt(c.T * c) } # TODO: This should be a Model in the future, but sympy is not yet # capable of computing Matrix derivatives at the time of writing. model = CallableModel(model_dict) self.assertEqual(model.params, [a]) self.assertEqual(model.independent_vars, [I, M, y]) self.assertEqual(model.dependent_vars, [z]) self.assertEqual(model.interdependent_vars, [W, c]) self.assertEqual(model.connectivity_mapping, {W: {I, M, a}, c: {W, y}, z: {c}}) # Generate data iden = np.eye(2) M_mat = np.array([[2, 1], [3, 4]]) y_vec = np.array([3, 5]) eval_model = model(I=iden, M=M_mat, y=y_vec, a=0.1) W_manual = np.linalg.inv(iden + M_mat / 0.1 ** 2) c_manual = - W_manual.dot(y_vec) z_manual = np.atleast_1d(np.sqrt(c_manual.T.dot(c_manual))) np.testing.assert_allclose(eval_model.W, W_manual) np.testing.assert_allclose(eval_model.c, c_manual) np.testing.assert_allclose(eval_model.z, z_manual) # Now try to retrieve the value of `a` from a fit a.value = 0.2 fit = Fit(model, z=z_manual, I=iden, M=M_mat, y=y_vec) fit_result = fit.execute() eval_model = model(I=iden, M=M_mat, y=y_vec, **fit_result.params) self.assertAlmostEqual(0.1, np.abs(fit_result.value(a))) np.testing.assert_allclose(eval_model.W, W_manual, rtol=1e-5) np.testing.assert_allclose(eval_model.c, c_manual, rtol=1e-5) np.testing.assert_allclose(eval_model.z, z_manual, rtol=1e-5)
def test_pickle(self): xdata = np.linspace(1, 10, 10) ydata = 3 * xdata ** 2 a = Parameter('a') # 3.1, min=2.5, max=3.5 b = Parameter('b') x = Variable('x') y = Variable('y') new = {y: a * x ** b} fit = Fit(new, x=xdata, y=ydata) fit_result = fit.execute() new_result = pickle.loads(pickle.dumps(fit_result)) self.assertEqual(fit_result.__dict__.keys(), new_result.__dict__.keys())