def as_gaussians(pfh):          # a: amplitude, b: mean, c: sigma
   return [curve_fitting.gaussian( a = pfh.x[1], b = pfh.x[0], c = pfh.x[2] ),
           curve_fitting.gaussian( a = pfh.x[3], b = pfh.x[0] + pfh.x[2] * constants[0],
                                   c = (constants[0]/constants[1])*(pfh.x[5] - pfh.x[2])+pfh.x[2]  ),
           curve_fitting.gaussian( a = pfh.x[4],
                                   b = pfh.x[0] + pfh.x[2] * constants[1],
                                   c = pfh.x[5])]
Beispiel #2
0
 def as_gaussians(pfh):
     return [
         curve_fitting.gaussian(a=pfh.x[1], b=pfh.x[0], c=pfh.x[2]),
         curve_fitting.gaussian(a=pfh.x[3],
                                b=pfh.x[0] +
                                pfh.x[2] * GAIN_TO_SIGMA,
                                c=pfh.x[2] * SIGMAFAC)
     ]
  def single_peak_fit(self, hist, lower_threshold, upper_threshold, mean,
                      zero_peak_gaussian=None):
    lower_slot = 0
    for slot in hist.slot_centers():
      lower_slot += 1
      if slot > lower_threshold: break
    upper_slot = 0
    for slot in hist.slot_centers():
      upper_slot += 1
      if slot > upper_threshold: break

    x = hist.slot_centers()
    y = hist.slots().as_double()
    starting_gaussians = [curve_fitting.gaussian(
      a=flex.max(y[lower_slot:upper_slot]), b=mean, c=3)]
   # print starting_gaussians
    #mamin: fit gaussian will take the maximum between starting point (lower_slot) and ending (upper_slot) as a
    if zero_peak_gaussian is not None:
      y -= zero_peak_gaussian(x)
    if 1:
      fit = curve_fitting.lbfgs_minimiser(
        starting_gaussians, x[lower_slot:upper_slot], y[lower_slot:upper_slot])
      sigma = abs(fit.functions[0].params[2])
      if sigma < 1 or sigma > 10:
        if flex.sum(y[lower_slot:upper_slot]) < 15: #mamin I changed 15 to 5
          # No point wasting time attempting to fit a gaussian if there aren't any counts
          #raise PixelFitError("Not enough counts to fit gaussian")
          return fit
        print "using cma_es:", sigma
        fit = curve_fitting.cma_es_minimiser(
          starting_gaussians, x[lower_slot:upper_slot], y[lower_slot:upper_slot])
    else:
      fit = curve_fitting.cma_es_minimiser(
        starting_gaussians, x[lower_slot:upper_slot], y[lower_slot:upper_slot])
    return fit
def exercise_savitzky_golay_smoothing():

  plot = False

  def rms(flex_double):
    return math.sqrt(flex.mean(flex.pow2(flex_double)))

  for sigma_frac in (0.005, 0.01, 0.05, 0.1):
    mean = random.randint(-5,5)
    scale = flex.random_double() * 10
    sigma = flex.random_double() * 5 + 1
    gaussian = curve_fitting.gaussian(scale, mean, sigma)

    x = flex.double(frange(-20,20,0.1))
    y = gaussian(x)
    rand_norm = scitbx.random.normal_distribution(
      mean=0, sigma=sigma_frac*flex.max_absolute(y))
    g = scitbx.random.variate(rand_norm)
    noise = g(y.size())
    y_noisy = y + noise
    # according to numerical recipes the best results are obtained where the
    # full window width is between 1 and 2 times the number of points at fwhm
    # for polynomials of degree 4
    half_window = int(round(0.5 * 2.355 * sigma * 10))
    y_filtered = savitzky_golay_filter(x, y_noisy, half_window=half_window, degree=4)[1]
    extracted_noise = y_noisy - y_filtered
    rms_noise = rms(noise)
    rms_extracted_noise = rms(extracted_noise)

    assert is_below_limit(
      value=abs(rand_norm.sigma - rms_noise)/rand_norm.sigma,
      limit=0.15)
    assert is_below_limit(
      value=abs(rand_norm.sigma - rms_extracted_noise)/rand_norm.sigma,
      limit=0.15)

    diff = y_filtered - y
    assert is_below_limit(
      value=(rms(diff)/ rand_norm.sigma),
      limit=0.4)

    if plot:
      from matplotlib import pyplot
      pyplot.plot(x, y)
      pyplot.plot(x, noise)
      pyplot.scatter(x, y_noisy, marker="x")
      pyplot.plot(x, y_filtered)
      pyplot.show()
      pyplot.plot(x, extracted_noise)
      pyplot.plot(x, noise)
      pyplot.show()

  return
    def single_peak_fit(self,
                        hist,
                        lower_threshold,
                        upper_threshold,
                        mean,
                        zero_peak_gaussian=None):
        lower_slot = 0
        for slot in hist.slot_centers():
            lower_slot += 1
            if slot > lower_threshold: break
        upper_slot = 0
        for slot in hist.slot_centers():
            upper_slot += 1
            if slot > upper_threshold: break

        x = hist.slot_centers()
        y = hist.slots().as_double()
        starting_gaussians = [
            curve_fitting.gaussian(a=flex.max(y[lower_slot:upper_slot]),
                                   b=mean,
                                   c=3)
        ]
        # print starting_gaussians
        #mamin: fit gaussian will take the maximum between starting point (lower_slot) and ending (upper_slot) as a
        if zero_peak_gaussian is not None:
            y -= zero_peak_gaussian(x)
        if 1:
            fit = curve_fitting.lbfgs_minimiser(starting_gaussians,
                                                x[lower_slot:upper_slot],
                                                y[lower_slot:upper_slot])
            sigma = abs(fit.functions[0].params[2])
            if sigma < 1 or sigma > 10:
                if flex.sum(y[lower_slot:upper_slot]
                            ) < 15:  #mamin I changed 15 to 5
                    # No point wasting time attempting to fit a gaussian if there aren't any counts
                    #raise PixelFitError("Not enough counts to fit gaussian")
                    return fit
                print "using cma_es:", sigma
                fit = curve_fitting.cma_es_minimiser(starting_gaussians,
                                                     x[lower_slot:upper_slot],
                                                     y[lower_slot:upper_slot])
        else:
            fit = curve_fitting.cma_es_minimiser(starting_gaussians,
                                                 x[lower_slot:upper_slot],
                                                 y[lower_slot:upper_slot])
        return fit
 def as_gaussians(pfh):
   return [curve_fitting.gaussian( a = pfh.x[1], b = pfh.x[0], c = pfh.x[2] ),
           curve_fitting.gaussian( a = pfh.x[3], b = pfh.x[0] + pfh.x[2] * GAIN_TO_SIGMA,
                                   c = pfh.x[2] * SIGMAFAC )]
def exercise_gaussian_fit():

    # test fitting of a gaussian
    def do_gaussian_fit(scale, mu, sigma):
        start = mu - 6 * sigma
        stop = mu + 6 * sigma
        step = (stop - start) / 1000
        x = flex.double(frange(start, stop, step))
        y = scale * flex.exp(-flex.pow2(x - mu) / (2 * sigma**2))
        fit = curve_fitting.single_gaussian_fit(x, y)
        assert approx_equal(fit.a, scale, 1e-4)
        assert approx_equal(fit.b, mu, eps=1e-4)
        assert approx_equal(fit.c, sigma, eps=1e-4)

    for i in range(10):
        scale = random.random() * 1000
        sigma = (random.random() + 0.0001) * 10
        mu = (-1)**random.randint(0, 1) * random.random() * 1000
        functor = curve_fitting.gaussian(scale, mu, sigma)
        start = mu - 6 * sigma
        stop = mu + 6 * sigma
        step = (stop - start) / 1000
        x = flex.double(frange(start, stop, step))
        fd_grads = finite_differences(functor, x)
        assert approx_equal(functor.partial_derivatives(x), fd_grads, 1e-4)
        do_gaussian_fit(scale, mu, sigma)

    # if we take the log of a gaussian we can fit a parabola
    scale = 123
    mu = 3.2
    sigma = 0.1
    x = flex.double(frange(2, 4, 0.01))
    y = scale * flex.exp(-flex.pow2(x - mu) / (2 * sigma**2))
    # need to be careful to only use values of y > 0
    eps = 1e-15
    x = flex.double([x[i] for i in range(x.size()) if y[i] > eps])
    y = flex.double([y[i] for i in range(y.size()) if y[i] > eps])
    fit = curve_fitting.univariate_polynomial_fit(x, flex.log(y), degree=2)
    c, b, a = fit.params
    assert approx_equal(mu, -b / (2 * a))
    assert approx_equal(sigma * sigma, -1 / (2 * a))

    # test multiple gaussian fits
    gaussians = [
        curve_fitting.gaussian(0.3989538, 3.7499764, 0.7500268),
        curve_fitting.gaussian(0.7978957, 6.0000004, 0.5000078)
    ]
    x = flex.double(frange(0, 10, 0.1))
    y = flex.double(x.size())
    for i in range(len(gaussians)):
        g = gaussians[i]
        scale, mu, sigma = g.a, g.b, g.c
        y += g(x)

    starting_gaussians = [
        curve_fitting.gaussian(1, 4, 1),
        curve_fitting.gaussian(1, 5, 1)
    ]
    fit = curve_fitting.gaussian_fit(x, y, starting_gaussians)
    for g1, g2 in zip(gaussians, fit.gaussians):
        assert approx_equal(g1.a, g2.a, eps=1e-4)
        assert approx_equal(g1.b, g2.b, eps=1e-4)
        assert approx_equal(g1.c, g2.c, eps=1e-4)

    # use example of 5-gaussian fit from here:
    # http://research.stowers-institute.org/efg/R/Statistics/MixturesOfDistributions/index.htm
    gaussians = [
        curve_fitting.gaussian(0.10516252, 23.32727, 2.436638),
        curve_fitting.gaussian(0.46462715, 33.09053, 2.997594),
        curve_fitting.gaussian(0.29827916, 41.27244, 4.274585),
        curve_fitting.gaussian(0.08986616, 51.24468, 5.077521),
        curve_fitting.gaussian(0.04206501, 61.31818, 7.070303)
    ]

    x = flex.double(frange(0, 80, 0.1))
    y = flex.double(x.size())
    for i in range(len(gaussians)):
        g = gaussians[i]
        scale, mu, sigma = g.a, g.b, g.c
        y += g(x)

    termination_params = scitbx.lbfgs.termination_parameters(
        min_iterations=500)
    starting_gaussians = [
        curve_fitting.gaussian(1, 21, 2.1),
        curve_fitting.gaussian(1, 30, 2.8),
        curve_fitting.gaussian(1, 40, 2.2),
        curve_fitting.gaussian(1, 51, 1.2),
        curve_fitting.gaussian(1, 60, 2.3)
    ]
    fit = curve_fitting.gaussian_fit(x,
                                     y,
                                     starting_gaussians,
                                     termination_params=termination_params)
    y_calc = fit.compute_y_calc()
    assert approx_equal(y, y_calc, eps=1e-2)

    have_cma_es = libtbx.env.has_module("cma_es")
    if have_cma_es:
        fit = curve_fitting.cma_es_minimiser(starting_gaussians, x, y)
        y_calc = fit.compute_y_calc()
        assert approx_equal(y, y_calc, eps=5e-2)
def exercise_gaussian_fit():

  # test fitting of a gaussian
  def do_gaussian_fit(scale, mu, sigma):
    start = mu - 6 * sigma
    stop = mu + 6 * sigma
    step = (stop - start)/1000
    x = flex.double(frange(start, stop, step))
    y = scale * flex.exp(-flex.pow2(x - mu) / (2 * sigma**2))
    fit = curve_fitting.single_gaussian_fit(x, y)
    assert approx_equal(fit.a, scale, 1e-4)
    assert approx_equal(fit.b, mu, eps=1e-4)
    assert approx_equal(fit.c, sigma, eps=1e-4)

  for i in range(10):
    scale = random.random() * 1000
    sigma = (random.random() + 0.0001) * 10
    mu = (-1)**random.randint(0,1) * random.random() * 1000
    functor = curve_fitting.gaussian(scale, mu, sigma)
    start = mu - 6 * sigma
    stop = mu + 6 * sigma
    step = (stop - start)/1000
    x = flex.double(frange(start, stop, step))
    fd_grads = finite_differences(functor, x)
    assert approx_equal(functor.partial_derivatives(x), fd_grads, 1e-4)
    do_gaussian_fit(scale, mu, sigma)

  # if we take the log of a gaussian we can fit a parabola
  scale = 123
  mu = 3.2
  sigma = 0.1
  x = flex.double(frange(2, 4, 0.01))
  y = scale * flex.exp(-flex.pow2(x - mu) / (2 * sigma**2))
  # need to be careful to only use values of y > 0
  eps = 1e-15
  x = flex.double([x[i] for i in range(x.size()) if y[i] > eps])
  y = flex.double([y[i] for i in range(y.size()) if y[i] > eps])
  fit = curve_fitting.univariate_polynomial_fit(x, flex.log(y), degree=2)
  c, b, a = fit.params
  assert approx_equal(mu, -b/(2*a))
  assert approx_equal(sigma*sigma, -1/(2*a))

  # test multiple gaussian fits
  gaussians = [curve_fitting.gaussian(0.3989538, 3.7499764, 0.7500268),
               curve_fitting.gaussian(0.7978957, 6.0000004, 0.5000078)]
  x = flex.double(frange(0, 10, 0.1))
  y = flex.double(x.size())
  for i in range(len(gaussians)):
    g = gaussians[i]
    scale, mu, sigma = g.a, g.b, g.c
    y += g(x)

  starting_gaussians = [
    curve_fitting.gaussian(1, 4, 1),
    curve_fitting.gaussian(1, 5, 1)]
  fit = curve_fitting.gaussian_fit(x, y, starting_gaussians)
  for g1, g2 in zip(gaussians, fit.gaussians):
    assert approx_equal(g1.a, g2.a, eps=1e-4)
    assert approx_equal(g1.b, g2.b, eps=1e-4)
    assert approx_equal(g1.c, g2.c, eps=1e-4)

  # use example of 5-gaussian fit from here:
  # http://research.stowers-institute.org/efg/R/Statistics/MixturesOfDistributions/index.htm
  gaussians = [curve_fitting.gaussian(0.10516252, 23.32727, 2.436638),
               curve_fitting.gaussian(0.46462715, 33.09053, 2.997594),
               curve_fitting.gaussian(0.29827916, 41.27244, 4.274585),
               curve_fitting.gaussian(0.08986616, 51.24468, 5.077521),
               curve_fitting.gaussian(0.04206501, 61.31818, 7.070303)]

  x = flex.double(frange(0, 80, 0.1))
  y = flex.double(x.size())
  for i in range(len(gaussians)):
    g = gaussians[i]
    scale, mu, sigma = g.a, g.b, g.c
    y += g(x)

  termination_params = scitbx.lbfgs.termination_parameters(
    min_iterations=500)
  starting_gaussians = [curve_fitting.gaussian(1, 21, 2.1),
                        curve_fitting.gaussian(1, 30, 2.8),
                        curve_fitting.gaussian(1, 40, 2.2),
                        curve_fitting.gaussian(1, 51, 1.2),
                        curve_fitting.gaussian(1, 60, 2.3)]
  fit = curve_fitting.gaussian_fit(
    x, y, starting_gaussians, termination_params=termination_params)
  y_calc = fit.compute_y_calc()
  assert approx_equal(y, y_calc, eps=1e-2)

  have_cma_es = libtbx.env.has_module("cma_es")
  if have_cma_es:
    fit = curve_fitting.cma_es_minimiser(starting_gaussians, x, y)
    y_calc = fit.compute_y_calc()
    assert approx_equal(y, y_calc, eps=5e-2)