Example #1
0
def p_of_none_greater(z, n):
    from scitbx.math import erf
    x = z / (2**0.5)
    p_one_greater = 0.5 * (1 - erf(x))  # one-tailed prob of Z > z
    p_not_one_greater = 1 - p_one_greater  # prob Z is not > z
    p_all_not_greater = p_not_one_greater**n  # prob all N not >z
    p_at_least_one_greater = 1 - p_all_not_greater  # prob at least one >z
    return p_at_least_one_greater
Example #2
0
 def single_level_curve(self, gi, rgi, bi, rgcfi, pi, q=None):
     gi = abs(gi)
     rgi = abs(rgi)
     bi = abs(bi)
     rgcfi = abs(rgcfi)
     if q is None:
         q = self.q
     result = abs(gi) * flex.exp(-q * q * rgi * rgi / 3.0) + bi * flex.exp(
         -q * q * rgcfi * rgcfi / 3.0) * flex.pow(
             flex.pow(sm.erf(q * rgi * self.cnst), 3.0) /
             (q + self.eps), pi)
     return result
Example #3
0
    def extreme_wilson_outliers(self,
                                p_extreme_wilson=1e-1,
                                return_data=False):

        n_acentric = self.acentric_work.data().size()
        n_centric = self.centric_work.data().size()

        extreme_acentric = 1.0 -  \
           flex.pow(1.0 - flex.exp(-self.acentric_work.data() ),float(n_acentric))
        extreme_centric = 1.0 - \
           flex.pow(erf(flex.sqrt(self.centric_work.data()/2.0) ),float(n_centric))

        acentric_selection = flex.bool(extreme_acentric > p_extreme_wilson)
        centric_selection = flex.bool(extreme_centric > p_extreme_wilson)
        all_flags = self.work_obs.customized_copy(
            indices=self.acentric_work.indices().concatenate(
                self.centric_work.indices()),
            data=acentric_selection.concatenate(centric_selection))
        all_p_values = self.work_obs.customized_copy(
            indices=self.acentric_work.indices().concatenate(
                self.centric_work.indices()),
            data=extreme_acentric.concatenate(extreme_centric))
        all_flags = all_flags.common_set(self.miller_obs)
        all_p_values = all_p_values.common_set(self.miller_obs)

        log_string = """
Outlier rejection based on extreme value Wilson statistics.
-----------------------------------------------------------

Reflections whose normalized intensity have an associated p-value
lower than %s are flagged as possible outliers.
The p-value is obtained using extreme value distributions of the
Wilson distribution.
    """ % (p_extreme_wilson)

        log_string = self.make_log_wilson(log_string, all_flags, all_p_values)

        print >> self.out
        print >> self.out, log_string
        print >> self.out

        if not return_data:
            return all_flags
        else:
            return self.miller_obs.select(all_flags.data())
    def extreme_wilson_outliers(self, p_extreme_wilson=1e-1, return_data=False):

        n_acentric = self.acentric_work.data().size()
        n_centric = self.centric_work.data().size()

        extreme_acentric = 1.0 - flex.pow(1.0 - flex.exp(-self.acentric_work.data()), float(n_acentric))
        extreme_centric = 1.0 - flex.pow(erf(flex.sqrt(self.centric_work.data() / 2.0)), float(n_centric))

        acentric_selection = flex.bool(extreme_acentric > p_extreme_wilson)
        centric_selection = flex.bool(extreme_centric > p_extreme_wilson)
        all_flags = self.work_obs.customized_copy(
            indices=self.acentric_work.indices().concatenate(self.centric_work.indices()),
            data=acentric_selection.concatenate(centric_selection),
        )
        all_p_values = self.work_obs.customized_copy(
            indices=self.acentric_work.indices().concatenate(self.centric_work.indices()),
            data=extreme_acentric.concatenate(extreme_centric),
        )
        all_flags = all_flags.common_set(self.miller_obs)
        all_p_values = all_p_values.common_set(self.miller_obs)

        log_string = """
Outlier rejection based on extreme value Wilson statistics.
-----------------------------------------------------------

Reflections whose normalized intensity have an associated p-value
lower than %s are flagged as possible outliers.
The p-value is obtained using extreme value distributions of the
Wilson distribution.
    """ % (
            p_extreme_wilson
        )

        log_string = self.make_log_wilson(log_string, all_flags, all_p_values)

        print >>self.out
        print >>self.out, log_string
        print >>self.out

        if not return_data:
            return all_flags
        else:
            return self.miller_obs.select(all_flags.data())
    def basic_wilson_outliers(self, p_basic_wilson=1e-6, return_data=False):
        p_acentric_single = 1.0 - (1.0 - flex.exp(-self.acentric_work.data()))
        p_centric_single = 1.0 - erf(flex.sqrt(self.centric_work.data() / 2.0))

        acentric_selection = flex.bool(p_acentric_single > p_basic_wilson)
        centric_selection = flex.bool(p_centric_single > p_basic_wilson)

        # combine all in a single miller array
        all_flags = self.work_obs.customized_copy(
            indices=self.acentric_work.indices().concatenate(self.centric_work.indices()),
            data=acentric_selection.concatenate(centric_selection),
        )
        all_p_values = self.work_obs.customized_copy(
            indices=self.acentric_work.indices().concatenate(self.centric_work.indices()),
            data=p_acentric_single.concatenate(p_centric_single),
        )

        # get the order right
        all_flags = all_flags.common_set(self.miller_obs)
        all_p_values = all_p_values.common_set(self.miller_obs)

        # prepare a table with results please
        log_string = """
Outlier rejection based on basic Wilson statistics.
--------------------------------------------------

See Read, Acta Cryst. (1999). D55, 1759-1764. for details.
Reflections whose normalized intensity have an associated p-value
lower than %s are flagged as possible outliers.
    """ % (
            p_basic_wilson
        )

        log_string = self.make_log_wilson(log_string, all_flags, all_p_values)
        print >>self.out
        print >>self.out, log_string
        print >>self.out

        if not return_data:
            return all_flags
        else:
            return self.miller_obs.select(all_flags.data())
Example #6
0
    def basic_wilson_outliers(self, p_basic_wilson=1E-6, return_data=False):
        p_acentric_single = 1.0 - (1.0 - flex.exp(-self.acentric_work.data()))
        p_centric_single = 1.0 - erf(flex.sqrt(self.centric_work.data() / 2.0))

        acentric_selection = flex.bool(p_acentric_single > p_basic_wilson)
        centric_selection = flex.bool(p_centric_single > p_basic_wilson)

        # combine all in a single miller array
        all_flags = self.work_obs.customized_copy(
            indices=self.acentric_work.indices().concatenate(
                self.centric_work.indices()),
            data=acentric_selection.concatenate(centric_selection))
        all_p_values = self.work_obs.customized_copy(
            indices=self.acentric_work.indices().concatenate(
                self.centric_work.indices()),
            data=p_acentric_single.concatenate(p_centric_single))

        # get the order right
        all_flags = all_flags.common_set(self.miller_obs)
        all_p_values = all_p_values.common_set(self.miller_obs)

        # prepare a table with results please
        log_string = """
Outlier rejection based on basic Wilson statistics.
--------------------------------------------------

See Read, Acta Cryst. (1999). D55, 1759-1764. for details.
Reflections whose normalized intensity have an associated p-value
lower than %s are flagged as possible outliers.
    """ % (p_basic_wilson)

        log_string = self.make_log_wilson(log_string, all_flags, all_p_values)
        print >> self.out
        print >> self.out, log_string
        print >> self.out

        if not return_data:
            return all_flags
        else:
            return self.miller_obs.select(all_flags.data())
Example #7
0
def test_luts():
  qerf = mmtbx.scaling.very_quick_erf(0.001)
  qeio = mmtbx.scaling.quick_ei0(5000)
  for i in xrange(-1000,1000):
    x=i/100.0
    assert approx_equal( qerf.erf(x), sm.erf(x), eps=1e-5 )
    if (x>=0):
      assert approx_equal( qeio.ei0(x), math.exp(-x)*sm.bessel_i0(x) , eps=1e-5 )
  number_of_iterations = 15000000
  for optimized in [False, True]:
    t0 = time.time()
    zero = qerf.loop_for_timings(number_of_iterations, optimized=optimized)
    print "very_quick_erf*%d optimized=%s: %.2f s" % (
      number_of_iterations, str(optimized), time.time()-t0)
    assert approx_equal(zero, 0)
  number_of_iterations = 5000000
  for optimized in [False, True]:
    t0 = time.time()
    zero = qeio.loop_for_timings(number_of_iterations, optimized=optimized)
    print "quick_ei0*%d optimized=%s: %.2f s" % (
      number_of_iterations, str(optimized), time.time()-t0)
    assert approx_equal(zero, 0)
Example #8
0
def test_luts():
    qerf = mmtbx.scaling.very_quick_erf(0.001)
    qeio = mmtbx.scaling.quick_ei0(5000)
    for i in xrange(-1000, 1000):
        x = i / 100.0
        assert approx_equal(qerf.erf(x), sm.erf(x), eps=1e-5)
        if (x >= 0):
            assert approx_equal(qeio.ei0(x),
                                math.exp(-x) * sm.bessel_i0(x),
                                eps=1e-5)
    number_of_iterations = 15000000
    for optimized in [False, True]:
        t0 = time.time()
        zero = qerf.loop_for_timings(number_of_iterations, optimized=optimized)
        print "very_quick_erf*%d optimized=%s: %.2f s" % (
            number_of_iterations, str(optimized), time.time() - t0)
        assert approx_equal(zero, 0)
    number_of_iterations = 5000000
    for optimized in [False, True]:
        t0 = time.time()
        zero = qeio.loop_for_timings(number_of_iterations, optimized=optimized)
        print "quick_ei0*%d optimized=%s: %.2f s" % (
            number_of_iterations, str(optimized), time.time() - t0)
        assert approx_equal(zero, 0)
Example #9
0
    def model_based_outliers(self,
                             f_model,
                             level=.01,
                             return_data=False,
                             plot_out=None):

        assert self.r_free_flags is not None
        if (self.r_free_flags.data().count(True) == 0):
            self.r_free_flags = self.r_free_flags.array(
                data=~self.r_free_flags.data())
        sigmaa_estimator = sigmaa_estimation.sigmaa_estimator(
            miller_obs=self.miller_obs,
            miller_calc=f_model,
            r_free_flags=self.r_free_flags,
            kernel_width_free_reflections=200,
            n_sampling_points=20,
            n_chebyshev_terms=13)

        sigmaa_estimator.show(out=self.out)
        sigmaa = sigmaa_estimator.sigmaa()
        obs_norm = abs(sigmaa_estimator.normalized_obs)
        calc_norm = sigmaa_estimator.normalized_calc

        f_model_outlier_object = scaling.likelihood_ratio_outlier_test(
            f_obs=obs_norm.data(),
            sigma_obs=None,
            f_calc=calc_norm.data(),
            # the data is prenormalized, all epsies are unity
            epsilon=flex.double(calc_norm.data().size(), 1.0),
            centric=obs_norm.centric_flags().data(),
            alpha=sigmaa.data(),
            beta=1.0 - sigmaa.data() * sigmaa.data())
        modes = f_model_outlier_object.posterior_mode()
        lik = f_model_outlier_object.log_likelihood()
        p_lik = f_model_outlier_object.posterior_mode_log_likelihood()
        s_der = f_model_outlier_object.posterior_mode_snd_der()

        ll_gain = f_model_outlier_object.standardized_likelihood()

        # The smallest vallue should be 0.
        # sometimes, due to numerical issues, it comes out
        # a wee bit negative. please repair that
        eps = 1.0e-10
        zeros = flex.bool(ll_gain < eps)
        p_values = ll_gain
        p_values = p_values.set_selected(zeros, eps)
        p_values = erf(flex.sqrt(p_values / 2.0))
        p_values = 1.0 - flex.pow(p_values, float(p_values.size()))

        # select on p-values
        flags = flex.bool(p_values > level)
        flags = self.miller_obs.customized_copy(data=flags)
        ll_gain = self.miller_obs.customized_copy(data=ll_gain)
        p_values = self.miller_obs.customized_copy(data=p_values)

        log_message = """

Model based outlier rejection.
------------------------------

Calculated amplitudes and estimated values of alpha and beta
are used to compute the log-likelihood of the observed amplitude.
The method is inspired by Read, Acta Cryst. (1999). D55, 1759-1764.
Outliers are rejected on the basis of the assumption that a scaled
log likelihood differnce 2(log[P(Fobs)]-log[P(Fmode)])/Q\" is distributed
according to a Chi-square distribution (Q\" is equal to the second
derivative of the log likelihood function of the mode of the
distribution).
The outlier threshold of the p-value relates to the p-value of the
extreme value distribution of the chi-square distribution.

"""

        flags.map_to_asu()
        ll_gain.map_to_asu()
        p_values.map_to_asu()

        assert flags.indices().all_eq(self.miller_obs.indices())
        assert ll_gain.indices().all_eq(self.miller_obs.indices())
        assert p_values.indices().all_eq(self.miller_obs.indices())

        log_message = self.make_log_model(log_message, flags, ll_gain,
                                          p_values, obs_norm, calc_norm,
                                          sigmaa, plot_out)
        tmp_log = StringIO()
        print >> tmp_log, log_message
        # histogram of log likelihood gain values
        print >> tmp_log
        print >> tmp_log, "The histoghram of scaled (LL-gain) values is shown below."
        print >> tmp_log, "  Note: scaled (LL-gain) is approximately Chi-square distributed."
        print >> tmp_log
        print >> tmp_log, "  scaled(LL-gain)  Frequency"
        histo = flex.histogram(ll_gain.data(), 15)
        histo.show(f=tmp_log, format_cutoffs='%7.3f')

        print >> self.out, tmp_log.getvalue()

        if not return_data:
            return flags
        else:
            assert flags.indices().all_eq(self.miller_obs.indices())
            return self.miller_obs.select(flags.data())
    def model_based_outliers(self, f_model, level=0.01, return_data=False, plot_out=None):

        assert self.r_free_flags is not None
        if self.r_free_flags.data().count(True) == 0:
            self.r_free_flags = self.r_free_flags.array(data=~self.r_free_flags.data())
        sigmaa_estimator = sigmaa_estimation.sigmaa_estimator(
            miller_obs=self.miller_obs,
            miller_calc=f_model,
            r_free_flags=self.r_free_flags,
            kernel_width_free_reflections=200,
            n_sampling_points=20,
            n_chebyshev_terms=13,
        )

        sigmaa_estimator.show(out=self.out)
        sigmaa = sigmaa_estimator.sigmaa()
        obs_norm = abs(sigmaa_estimator.normalized_obs)
        calc_norm = sigmaa_estimator.normalized_calc

        f_model_outlier_object = scaling.likelihood_ratio_outlier_test(
            f_obs=obs_norm.data(),
            sigma_obs=None,
            f_calc=calc_norm.data(),
            # the data is prenormalized, all epsies are unity
            epsilon=flex.double(calc_norm.data().size(), 1.0),
            centric=obs_norm.centric_flags().data(),
            alpha=sigmaa.data(),
            beta=1.0 - sigmaa.data() * sigmaa.data(),
        )
        modes = f_model_outlier_object.posterior_mode()
        lik = f_model_outlier_object.log_likelihood()
        p_lik = f_model_outlier_object.posterior_mode_log_likelihood()
        s_der = f_model_outlier_object.posterior_mode_snd_der()

        ll_gain = f_model_outlier_object.standardized_likelihood()

        # The smallest vallue should be 0.
        # sometimes, due to numerical issues, it comes out
        # a wee bit negative. please repair that
        eps = 1.0e-10
        zeros = flex.bool(ll_gain < eps)
        p_values = ll_gain
        p_values = p_values.set_selected(zeros, eps)
        p_values = erf(flex.sqrt(p_values / 2.0))
        p_values = 1.0 - flex.pow(p_values, float(p_values.size()))

        # select on p-values
        flags = flex.bool(p_values > level)
        flags = self.miller_obs.customized_copy(data=flags)
        ll_gain = self.miller_obs.customized_copy(data=ll_gain)
        p_values = self.miller_obs.customized_copy(data=p_values)

        log_message = """

Model based outlier rejection.
------------------------------

Calculated amplitudes and estimated values of alpha and beta
are used to compute the log-likelihood of the observed amplitude.
The method is inspired by Read, Acta Cryst. (1999). D55, 1759-1764.
Outliers are rejected on the basis of the assumption that a scaled
log likelihood differnce 2(log[P(Fobs)]-log[P(Fmode)])/Q\" is distributed
according to a Chi-square distribution (Q\" is equal to the second
derivative of the log likelihood function of the mode of the
distribution).
The outlier threshold of the p-value relates to the p-value of the
extreme value distribution of the chi-square distribution.

"""

        flags.map_to_asu()
        ll_gain.map_to_asu()
        p_values.map_to_asu()

        assert flags.indices().all_eq(self.miller_obs.indices())
        assert ll_gain.indices().all_eq(self.miller_obs.indices())
        assert p_values.indices().all_eq(self.miller_obs.indices())

        log_message = self.make_log_model(log_message, flags, ll_gain, p_values, obs_norm, calc_norm, sigmaa, plot_out)
        tmp_log = StringIO()
        print >> tmp_log, log_message
        # histogram of log likelihood gain values
        print >> tmp_log
        print >> tmp_log, "The histoghram of scaled (LL-gain) values is shown below."
        print >> tmp_log, "  Note: scaled (LL-gain) is approximately Chi-square distributed."
        print >> tmp_log
        print >> tmp_log, "  scaled(LL-gain)  Frequency"
        histo = flex.histogram(ll_gain.data(), 15)
        histo.show(f=tmp_log, format_cutoffs="%7.3f")

        print >>self.out, tmp_log.getvalue()

        if not return_data:
            return flags
        else:
            assert flags.indices().all_eq(self.miller_obs.indices())
            return self.miller_obs.select(flags.data())