def p_of_none_greater(z, n): from scitbx.math import erf x = z / (2**0.5) p_one_greater = 0.5 * (1 - erf(x)) # one-tailed prob of Z > z p_not_one_greater = 1 - p_one_greater # prob Z is not > z p_all_not_greater = p_not_one_greater**n # prob all N not >z p_at_least_one_greater = 1 - p_all_not_greater # prob at least one >z return p_at_least_one_greater
def single_level_curve(self, gi, rgi, bi, rgcfi, pi, q=None): gi = abs(gi) rgi = abs(rgi) bi = abs(bi) rgcfi = abs(rgcfi) if q is None: q = self.q result = abs(gi) * flex.exp(-q * q * rgi * rgi / 3.0) + bi * flex.exp( -q * q * rgcfi * rgcfi / 3.0) * flex.pow( flex.pow(sm.erf(q * rgi * self.cnst), 3.0) / (q + self.eps), pi) return result
def extreme_wilson_outliers(self, p_extreme_wilson=1e-1, return_data=False): n_acentric = self.acentric_work.data().size() n_centric = self.centric_work.data().size() extreme_acentric = 1.0 - \ flex.pow(1.0 - flex.exp(-self.acentric_work.data() ),float(n_acentric)) extreme_centric = 1.0 - \ flex.pow(erf(flex.sqrt(self.centric_work.data()/2.0) ),float(n_centric)) acentric_selection = flex.bool(extreme_acentric > p_extreme_wilson) centric_selection = flex.bool(extreme_centric > p_extreme_wilson) all_flags = self.work_obs.customized_copy( indices=self.acentric_work.indices().concatenate( self.centric_work.indices()), data=acentric_selection.concatenate(centric_selection)) all_p_values = self.work_obs.customized_copy( indices=self.acentric_work.indices().concatenate( self.centric_work.indices()), data=extreme_acentric.concatenate(extreme_centric)) all_flags = all_flags.common_set(self.miller_obs) all_p_values = all_p_values.common_set(self.miller_obs) log_string = """ Outlier rejection based on extreme value Wilson statistics. ----------------------------------------------------------- Reflections whose normalized intensity have an associated p-value lower than %s are flagged as possible outliers. The p-value is obtained using extreme value distributions of the Wilson distribution. """ % (p_extreme_wilson) log_string = self.make_log_wilson(log_string, all_flags, all_p_values) print >> self.out print >> self.out, log_string print >> self.out if not return_data: return all_flags else: return self.miller_obs.select(all_flags.data())
def extreme_wilson_outliers(self, p_extreme_wilson=1e-1, return_data=False): n_acentric = self.acentric_work.data().size() n_centric = self.centric_work.data().size() extreme_acentric = 1.0 - flex.pow(1.0 - flex.exp(-self.acentric_work.data()), float(n_acentric)) extreme_centric = 1.0 - flex.pow(erf(flex.sqrt(self.centric_work.data() / 2.0)), float(n_centric)) acentric_selection = flex.bool(extreme_acentric > p_extreme_wilson) centric_selection = flex.bool(extreme_centric > p_extreme_wilson) all_flags = self.work_obs.customized_copy( indices=self.acentric_work.indices().concatenate(self.centric_work.indices()), data=acentric_selection.concatenate(centric_selection), ) all_p_values = self.work_obs.customized_copy( indices=self.acentric_work.indices().concatenate(self.centric_work.indices()), data=extreme_acentric.concatenate(extreme_centric), ) all_flags = all_flags.common_set(self.miller_obs) all_p_values = all_p_values.common_set(self.miller_obs) log_string = """ Outlier rejection based on extreme value Wilson statistics. ----------------------------------------------------------- Reflections whose normalized intensity have an associated p-value lower than %s are flagged as possible outliers. The p-value is obtained using extreme value distributions of the Wilson distribution. """ % ( p_extreme_wilson ) log_string = self.make_log_wilson(log_string, all_flags, all_p_values) print >>self.out print >>self.out, log_string print >>self.out if not return_data: return all_flags else: return self.miller_obs.select(all_flags.data())
def basic_wilson_outliers(self, p_basic_wilson=1e-6, return_data=False): p_acentric_single = 1.0 - (1.0 - flex.exp(-self.acentric_work.data())) p_centric_single = 1.0 - erf(flex.sqrt(self.centric_work.data() / 2.0)) acentric_selection = flex.bool(p_acentric_single > p_basic_wilson) centric_selection = flex.bool(p_centric_single > p_basic_wilson) # combine all in a single miller array all_flags = self.work_obs.customized_copy( indices=self.acentric_work.indices().concatenate(self.centric_work.indices()), data=acentric_selection.concatenate(centric_selection), ) all_p_values = self.work_obs.customized_copy( indices=self.acentric_work.indices().concatenate(self.centric_work.indices()), data=p_acentric_single.concatenate(p_centric_single), ) # get the order right all_flags = all_flags.common_set(self.miller_obs) all_p_values = all_p_values.common_set(self.miller_obs) # prepare a table with results please log_string = """ Outlier rejection based on basic Wilson statistics. -------------------------------------------------- See Read, Acta Cryst. (1999). D55, 1759-1764. for details. Reflections whose normalized intensity have an associated p-value lower than %s are flagged as possible outliers. """ % ( p_basic_wilson ) log_string = self.make_log_wilson(log_string, all_flags, all_p_values) print >>self.out print >>self.out, log_string print >>self.out if not return_data: return all_flags else: return self.miller_obs.select(all_flags.data())
def basic_wilson_outliers(self, p_basic_wilson=1E-6, return_data=False): p_acentric_single = 1.0 - (1.0 - flex.exp(-self.acentric_work.data())) p_centric_single = 1.0 - erf(flex.sqrt(self.centric_work.data() / 2.0)) acentric_selection = flex.bool(p_acentric_single > p_basic_wilson) centric_selection = flex.bool(p_centric_single > p_basic_wilson) # combine all in a single miller array all_flags = self.work_obs.customized_copy( indices=self.acentric_work.indices().concatenate( self.centric_work.indices()), data=acentric_selection.concatenate(centric_selection)) all_p_values = self.work_obs.customized_copy( indices=self.acentric_work.indices().concatenate( self.centric_work.indices()), data=p_acentric_single.concatenate(p_centric_single)) # get the order right all_flags = all_flags.common_set(self.miller_obs) all_p_values = all_p_values.common_set(self.miller_obs) # prepare a table with results please log_string = """ Outlier rejection based on basic Wilson statistics. -------------------------------------------------- See Read, Acta Cryst. (1999). D55, 1759-1764. for details. Reflections whose normalized intensity have an associated p-value lower than %s are flagged as possible outliers. """ % (p_basic_wilson) log_string = self.make_log_wilson(log_string, all_flags, all_p_values) print >> self.out print >> self.out, log_string print >> self.out if not return_data: return all_flags else: return self.miller_obs.select(all_flags.data())
def test_luts(): qerf = mmtbx.scaling.very_quick_erf(0.001) qeio = mmtbx.scaling.quick_ei0(5000) for i in xrange(-1000,1000): x=i/100.0 assert approx_equal( qerf.erf(x), sm.erf(x), eps=1e-5 ) if (x>=0): assert approx_equal( qeio.ei0(x), math.exp(-x)*sm.bessel_i0(x) , eps=1e-5 ) number_of_iterations = 15000000 for optimized in [False, True]: t0 = time.time() zero = qerf.loop_for_timings(number_of_iterations, optimized=optimized) print "very_quick_erf*%d optimized=%s: %.2f s" % ( number_of_iterations, str(optimized), time.time()-t0) assert approx_equal(zero, 0) number_of_iterations = 5000000 for optimized in [False, True]: t0 = time.time() zero = qeio.loop_for_timings(number_of_iterations, optimized=optimized) print "quick_ei0*%d optimized=%s: %.2f s" % ( number_of_iterations, str(optimized), time.time()-t0) assert approx_equal(zero, 0)
def test_luts(): qerf = mmtbx.scaling.very_quick_erf(0.001) qeio = mmtbx.scaling.quick_ei0(5000) for i in xrange(-1000, 1000): x = i / 100.0 assert approx_equal(qerf.erf(x), sm.erf(x), eps=1e-5) if (x >= 0): assert approx_equal(qeio.ei0(x), math.exp(-x) * sm.bessel_i0(x), eps=1e-5) number_of_iterations = 15000000 for optimized in [False, True]: t0 = time.time() zero = qerf.loop_for_timings(number_of_iterations, optimized=optimized) print "very_quick_erf*%d optimized=%s: %.2f s" % ( number_of_iterations, str(optimized), time.time() - t0) assert approx_equal(zero, 0) number_of_iterations = 5000000 for optimized in [False, True]: t0 = time.time() zero = qeio.loop_for_timings(number_of_iterations, optimized=optimized) print "quick_ei0*%d optimized=%s: %.2f s" % ( number_of_iterations, str(optimized), time.time() - t0) assert approx_equal(zero, 0)
def model_based_outliers(self, f_model, level=.01, return_data=False, plot_out=None): assert self.r_free_flags is not None if (self.r_free_flags.data().count(True) == 0): self.r_free_flags = self.r_free_flags.array( data=~self.r_free_flags.data()) sigmaa_estimator = sigmaa_estimation.sigmaa_estimator( miller_obs=self.miller_obs, miller_calc=f_model, r_free_flags=self.r_free_flags, kernel_width_free_reflections=200, n_sampling_points=20, n_chebyshev_terms=13) sigmaa_estimator.show(out=self.out) sigmaa = sigmaa_estimator.sigmaa() obs_norm = abs(sigmaa_estimator.normalized_obs) calc_norm = sigmaa_estimator.normalized_calc f_model_outlier_object = scaling.likelihood_ratio_outlier_test( f_obs=obs_norm.data(), sigma_obs=None, f_calc=calc_norm.data(), # the data is prenormalized, all epsies are unity epsilon=flex.double(calc_norm.data().size(), 1.0), centric=obs_norm.centric_flags().data(), alpha=sigmaa.data(), beta=1.0 - sigmaa.data() * sigmaa.data()) modes = f_model_outlier_object.posterior_mode() lik = f_model_outlier_object.log_likelihood() p_lik = f_model_outlier_object.posterior_mode_log_likelihood() s_der = f_model_outlier_object.posterior_mode_snd_der() ll_gain = f_model_outlier_object.standardized_likelihood() # The smallest vallue should be 0. # sometimes, due to numerical issues, it comes out # a wee bit negative. please repair that eps = 1.0e-10 zeros = flex.bool(ll_gain < eps) p_values = ll_gain p_values = p_values.set_selected(zeros, eps) p_values = erf(flex.sqrt(p_values / 2.0)) p_values = 1.0 - flex.pow(p_values, float(p_values.size())) # select on p-values flags = flex.bool(p_values > level) flags = self.miller_obs.customized_copy(data=flags) ll_gain = self.miller_obs.customized_copy(data=ll_gain) p_values = self.miller_obs.customized_copy(data=p_values) log_message = """ Model based outlier rejection. ------------------------------ Calculated amplitudes and estimated values of alpha and beta are used to compute the log-likelihood of the observed amplitude. The method is inspired by Read, Acta Cryst. (1999). D55, 1759-1764. Outliers are rejected on the basis of the assumption that a scaled log likelihood differnce 2(log[P(Fobs)]-log[P(Fmode)])/Q\" is distributed according to a Chi-square distribution (Q\" is equal to the second derivative of the log likelihood function of the mode of the distribution). The outlier threshold of the p-value relates to the p-value of the extreme value distribution of the chi-square distribution. """ flags.map_to_asu() ll_gain.map_to_asu() p_values.map_to_asu() assert flags.indices().all_eq(self.miller_obs.indices()) assert ll_gain.indices().all_eq(self.miller_obs.indices()) assert p_values.indices().all_eq(self.miller_obs.indices()) log_message = self.make_log_model(log_message, flags, ll_gain, p_values, obs_norm, calc_norm, sigmaa, plot_out) tmp_log = StringIO() print >> tmp_log, log_message # histogram of log likelihood gain values print >> tmp_log print >> tmp_log, "The histoghram of scaled (LL-gain) values is shown below." print >> tmp_log, " Note: scaled (LL-gain) is approximately Chi-square distributed." print >> tmp_log print >> tmp_log, " scaled(LL-gain) Frequency" histo = flex.histogram(ll_gain.data(), 15) histo.show(f=tmp_log, format_cutoffs='%7.3f') print >> self.out, tmp_log.getvalue() if not return_data: return flags else: assert flags.indices().all_eq(self.miller_obs.indices()) return self.miller_obs.select(flags.data())
def model_based_outliers(self, f_model, level=0.01, return_data=False, plot_out=None): assert self.r_free_flags is not None if self.r_free_flags.data().count(True) == 0: self.r_free_flags = self.r_free_flags.array(data=~self.r_free_flags.data()) sigmaa_estimator = sigmaa_estimation.sigmaa_estimator( miller_obs=self.miller_obs, miller_calc=f_model, r_free_flags=self.r_free_flags, kernel_width_free_reflections=200, n_sampling_points=20, n_chebyshev_terms=13, ) sigmaa_estimator.show(out=self.out) sigmaa = sigmaa_estimator.sigmaa() obs_norm = abs(sigmaa_estimator.normalized_obs) calc_norm = sigmaa_estimator.normalized_calc f_model_outlier_object = scaling.likelihood_ratio_outlier_test( f_obs=obs_norm.data(), sigma_obs=None, f_calc=calc_norm.data(), # the data is prenormalized, all epsies are unity epsilon=flex.double(calc_norm.data().size(), 1.0), centric=obs_norm.centric_flags().data(), alpha=sigmaa.data(), beta=1.0 - sigmaa.data() * sigmaa.data(), ) modes = f_model_outlier_object.posterior_mode() lik = f_model_outlier_object.log_likelihood() p_lik = f_model_outlier_object.posterior_mode_log_likelihood() s_der = f_model_outlier_object.posterior_mode_snd_der() ll_gain = f_model_outlier_object.standardized_likelihood() # The smallest vallue should be 0. # sometimes, due to numerical issues, it comes out # a wee bit negative. please repair that eps = 1.0e-10 zeros = flex.bool(ll_gain < eps) p_values = ll_gain p_values = p_values.set_selected(zeros, eps) p_values = erf(flex.sqrt(p_values / 2.0)) p_values = 1.0 - flex.pow(p_values, float(p_values.size())) # select on p-values flags = flex.bool(p_values > level) flags = self.miller_obs.customized_copy(data=flags) ll_gain = self.miller_obs.customized_copy(data=ll_gain) p_values = self.miller_obs.customized_copy(data=p_values) log_message = """ Model based outlier rejection. ------------------------------ Calculated amplitudes and estimated values of alpha and beta are used to compute the log-likelihood of the observed amplitude. The method is inspired by Read, Acta Cryst. (1999). D55, 1759-1764. Outliers are rejected on the basis of the assumption that a scaled log likelihood differnce 2(log[P(Fobs)]-log[P(Fmode)])/Q\" is distributed according to a Chi-square distribution (Q\" is equal to the second derivative of the log likelihood function of the mode of the distribution). The outlier threshold of the p-value relates to the p-value of the extreme value distribution of the chi-square distribution. """ flags.map_to_asu() ll_gain.map_to_asu() p_values.map_to_asu() assert flags.indices().all_eq(self.miller_obs.indices()) assert ll_gain.indices().all_eq(self.miller_obs.indices()) assert p_values.indices().all_eq(self.miller_obs.indices()) log_message = self.make_log_model(log_message, flags, ll_gain, p_values, obs_norm, calc_norm, sigmaa, plot_out) tmp_log = StringIO() print >> tmp_log, log_message # histogram of log likelihood gain values print >> tmp_log print >> tmp_log, "The histoghram of scaled (LL-gain) values is shown below." print >> tmp_log, " Note: scaled (LL-gain) is approximately Chi-square distributed." print >> tmp_log print >> tmp_log, " scaled(LL-gain) Frequency" histo = flex.histogram(ll_gain.data(), 15) histo.show(f=tmp_log, format_cutoffs="%7.3f") print >>self.out, tmp_log.getvalue() if not return_data: return flags else: assert flags.indices().all_eq(self.miller_obs.indices()) return self.miller_obs.select(flags.data())