def exercise_polynomial_fit(): def do_polynomial_fit(x, params): n_terms = len(params) y = flex.double(x.size()) for i in range(len(params)): y += params[i] * flex.pow(x, i) fit = curve_fitting.univariate_polynomial_fit(x, y, degree=n_terms - 1) assert approx_equal(params, fit.params, eps=1e-4) x = flex.double(range(-50, 50)) do_polynomial_fit(x, (2, 3, 5)) # y = 2 + 3x + 5x^2 do_polynomial_fit(x, (-0.0002, -1000)) # y = -0.0002 -1000x for n_terms in range(1, 6): params = [100 * random.random() for i in range(n_terms)] x = flex.double( frange(-random.randint(1, 10), random.randint(1, 10), 0.1)) functor = curve_fitting.univariate_polynomial(*params) fd_grads = finite_differences(functor, x) assert approx_equal(functor.partial_derivatives(x), fd_grads, 1e-4) do_polynomial_fit(x, params)
def exercise_polynomial_fit(): def do_polynomial_fit(x, params): n_terms = len(params) y = flex.double(x.size()) for i in range(len(params)): y += params[i] * flex.pow(x, i) fit = curve_fitting.univariate_polynomial_fit(x, y, degree=n_terms-1) assert approx_equal(params, fit.params, eps=1e-4) x = flex.double(range(-50,50)) do_polynomial_fit(x, (2,3,5)) # y = 2 + 3x + 5x^2 do_polynomial_fit(x, (-0.0002, -1000)) # y = -0.0002 -1000x for n_terms in range(1, 6): params = [100*random.random() for i in range(n_terms)] x = flex.double(frange(-random.randint(1,10), random.randint(1,10), 0.1)) functor = curve_fitting.univariate_polynomial(*params) fd_grads = finite_differences(functor, x) assert approx_equal(functor.partial_derivatives(x), fd_grads, 1e-4) do_polynomial_fit(x, params)
def exercise_savitzky_golay_smoothing(): plot = False def rms(flex_double): return math.sqrt(flex.mean(flex.pow2(flex_double))) for sigma_frac in (0.005, 0.01, 0.05, 0.1): mean = random.randint(-5,5) scale = flex.random_double() * 10 sigma = flex.random_double() * 5 + 1 gaussian = curve_fitting.gaussian(scale, mean, sigma) x = flex.double(frange(-20,20,0.1)) y = gaussian(x) rand_norm = scitbx.random.normal_distribution( mean=0, sigma=sigma_frac*flex.max_absolute(y)) g = scitbx.random.variate(rand_norm) noise = g(y.size()) y_noisy = y + noise # according to numerical recipes the best results are obtained where the # full window width is between 1 and 2 times the number of points at fwhm # for polynomials of degree 4 half_window = int(round(0.5 * 2.355 * sigma * 10)) y_filtered = savitzky_golay_filter(x, y_noisy, half_window=half_window, degree=4)[1] extracted_noise = y_noisy - y_filtered rms_noise = rms(noise) rms_extracted_noise = rms(extracted_noise) assert is_below_limit( value=abs(rand_norm.sigma - rms_noise)/rand_norm.sigma, limit=0.15) assert is_below_limit( value=abs(rand_norm.sigma - rms_extracted_noise)/rand_norm.sigma, limit=0.15) diff = y_filtered - y assert is_below_limit( value=(rms(diff)/ rand_norm.sigma), limit=0.4) if plot: from matplotlib import pyplot pyplot.plot(x, y) pyplot.plot(x, noise) pyplot.scatter(x, y_noisy, marker="x") pyplot.plot(x, y_filtered) pyplot.show() pyplot.plot(x, extracted_noise) pyplot.plot(x, noise) pyplot.show() return
def exercise_gaussian_fit(): # test fitting of a gaussian def do_gaussian_fit(scale, mu, sigma): start = mu - 6 * sigma stop = mu + 6 * sigma step = (stop - start) / 1000 x = flex.double(frange(start, stop, step)) y = scale * flex.exp(-flex.pow2(x - mu) / (2 * sigma**2)) fit = curve_fitting.single_gaussian_fit(x, y) assert approx_equal(fit.a, scale, 1e-4) assert approx_equal(fit.b, mu, eps=1e-4) assert approx_equal(fit.c, sigma, eps=1e-4) for i in range(10): scale = random.random() * 1000 sigma = (random.random() + 0.0001) * 10 mu = (-1)**random.randint(0, 1) * random.random() * 1000 functor = curve_fitting.gaussian(scale, mu, sigma) start = mu - 6 * sigma stop = mu + 6 * sigma step = (stop - start) / 1000 x = flex.double(frange(start, stop, step)) fd_grads = finite_differences(functor, x) assert approx_equal(functor.partial_derivatives(x), fd_grads, 1e-4) do_gaussian_fit(scale, mu, sigma) # if we take the log of a gaussian we can fit a parabola scale = 123 mu = 3.2 sigma = 0.1 x = flex.double(frange(2, 4, 0.01)) y = scale * flex.exp(-flex.pow2(x - mu) / (2 * sigma**2)) # need to be careful to only use values of y > 0 eps = 1e-15 x = flex.double([x[i] for i in range(x.size()) if y[i] > eps]) y = flex.double([y[i] for i in range(y.size()) if y[i] > eps]) fit = curve_fitting.univariate_polynomial_fit(x, flex.log(y), degree=2) c, b, a = fit.params assert approx_equal(mu, -b / (2 * a)) assert approx_equal(sigma * sigma, -1 / (2 * a)) # test multiple gaussian fits gaussians = [ curve_fitting.gaussian(0.3989538, 3.7499764, 0.7500268), curve_fitting.gaussian(0.7978957, 6.0000004, 0.5000078) ] x = flex.double(frange(0, 10, 0.1)) y = flex.double(x.size()) for i in range(len(gaussians)): g = gaussians[i] scale, mu, sigma = g.a, g.b, g.c y += g(x) starting_gaussians = [ curve_fitting.gaussian(1, 4, 1), curve_fitting.gaussian(1, 5, 1) ] fit = curve_fitting.gaussian_fit(x, y, starting_gaussians) for g1, g2 in zip(gaussians, fit.gaussians): assert approx_equal(g1.a, g2.a, eps=1e-4) assert approx_equal(g1.b, g2.b, eps=1e-4) assert approx_equal(g1.c, g2.c, eps=1e-4) # use example of 5-gaussian fit from here: # http://research.stowers-institute.org/efg/R/Statistics/MixturesOfDistributions/index.htm gaussians = [ curve_fitting.gaussian(0.10516252, 23.32727, 2.436638), curve_fitting.gaussian(0.46462715, 33.09053, 2.997594), curve_fitting.gaussian(0.29827916, 41.27244, 4.274585), curve_fitting.gaussian(0.08986616, 51.24468, 5.077521), curve_fitting.gaussian(0.04206501, 61.31818, 7.070303) ] x = flex.double(frange(0, 80, 0.1)) y = flex.double(x.size()) for i in range(len(gaussians)): g = gaussians[i] scale, mu, sigma = g.a, g.b, g.c y += g(x) termination_params = scitbx.lbfgs.termination_parameters( min_iterations=500) starting_gaussians = [ curve_fitting.gaussian(1, 21, 2.1), curve_fitting.gaussian(1, 30, 2.8), curve_fitting.gaussian(1, 40, 2.2), curve_fitting.gaussian(1, 51, 1.2), curve_fitting.gaussian(1, 60, 2.3) ] fit = curve_fitting.gaussian_fit(x, y, starting_gaussians, termination_params=termination_params) y_calc = fit.compute_y_calc() assert approx_equal(y, y_calc, eps=1e-2) have_cma_es = libtbx.env.has_module("cma_es") if have_cma_es: fit = curve_fitting.cma_es_minimiser(starting_gaussians, x, y) y_calc = fit.compute_y_calc() assert approx_equal(y, y_calc, eps=5e-2)
def exercise_gaussian_fit(): # test fitting of a gaussian def do_gaussian_fit(scale, mu, sigma): start = mu - 6 * sigma stop = mu + 6 * sigma step = (stop - start)/1000 x = flex.double(frange(start, stop, step)) y = scale * flex.exp(-flex.pow2(x - mu) / (2 * sigma**2)) fit = curve_fitting.single_gaussian_fit(x, y) assert approx_equal(fit.a, scale, 1e-4) assert approx_equal(fit.b, mu, eps=1e-4) assert approx_equal(fit.c, sigma, eps=1e-4) for i in range(10): scale = random.random() * 1000 sigma = (random.random() + 0.0001) * 10 mu = (-1)**random.randint(0,1) * random.random() * 1000 functor = curve_fitting.gaussian(scale, mu, sigma) start = mu - 6 * sigma stop = mu + 6 * sigma step = (stop - start)/1000 x = flex.double(frange(start, stop, step)) fd_grads = finite_differences(functor, x) assert approx_equal(functor.partial_derivatives(x), fd_grads, 1e-4) do_gaussian_fit(scale, mu, sigma) # if we take the log of a gaussian we can fit a parabola scale = 123 mu = 3.2 sigma = 0.1 x = flex.double(frange(2, 4, 0.01)) y = scale * flex.exp(-flex.pow2(x - mu) / (2 * sigma**2)) # need to be careful to only use values of y > 0 eps = 1e-15 x = flex.double([x[i] for i in range(x.size()) if y[i] > eps]) y = flex.double([y[i] for i in range(y.size()) if y[i] > eps]) fit = curve_fitting.univariate_polynomial_fit(x, flex.log(y), degree=2) c, b, a = fit.params assert approx_equal(mu, -b/(2*a)) assert approx_equal(sigma*sigma, -1/(2*a)) # test multiple gaussian fits gaussians = [curve_fitting.gaussian(0.3989538, 3.7499764, 0.7500268), curve_fitting.gaussian(0.7978957, 6.0000004, 0.5000078)] x = flex.double(frange(0, 10, 0.1)) y = flex.double(x.size()) for i in range(len(gaussians)): g = gaussians[i] scale, mu, sigma = g.a, g.b, g.c y += g(x) starting_gaussians = [ curve_fitting.gaussian(1, 4, 1), curve_fitting.gaussian(1, 5, 1)] fit = curve_fitting.gaussian_fit(x, y, starting_gaussians) for g1, g2 in zip(gaussians, fit.gaussians): assert approx_equal(g1.a, g2.a, eps=1e-4) assert approx_equal(g1.b, g2.b, eps=1e-4) assert approx_equal(g1.c, g2.c, eps=1e-4) # use example of 5-gaussian fit from here: # http://research.stowers-institute.org/efg/R/Statistics/MixturesOfDistributions/index.htm gaussians = [curve_fitting.gaussian(0.10516252, 23.32727, 2.436638), curve_fitting.gaussian(0.46462715, 33.09053, 2.997594), curve_fitting.gaussian(0.29827916, 41.27244, 4.274585), curve_fitting.gaussian(0.08986616, 51.24468, 5.077521), curve_fitting.gaussian(0.04206501, 61.31818, 7.070303)] x = flex.double(frange(0, 80, 0.1)) y = flex.double(x.size()) for i in range(len(gaussians)): g = gaussians[i] scale, mu, sigma = g.a, g.b, g.c y += g(x) termination_params = scitbx.lbfgs.termination_parameters( min_iterations=500) starting_gaussians = [curve_fitting.gaussian(1, 21, 2.1), curve_fitting.gaussian(1, 30, 2.8), curve_fitting.gaussian(1, 40, 2.2), curve_fitting.gaussian(1, 51, 1.2), curve_fitting.gaussian(1, 60, 2.3)] fit = curve_fitting.gaussian_fit( x, y, starting_gaussians, termination_params=termination_params) y_calc = fit.compute_y_calc() assert approx_equal(y, y_calc, eps=1e-2) have_cma_es = libtbx.env.has_module("cma_es") if have_cma_es: fit = curve_fitting.cma_es_minimiser(starting_gaussians, x, y) y_calc = fit.compute_y_calc() assert approx_equal(y, y_calc, eps=5e-2)