def model_based_outliers(self, f_model, level=.01, return_data=False, plot_out=None): assert self.r_free_flags is not None if (self.r_free_flags.data().count(True) == 0): self.r_free_flags = self.r_free_flags.array( data=~self.r_free_flags.data()) sigmaa_estimator = sigmaa_estimation.sigmaa_estimator( miller_obs=self.miller_obs, miller_calc=f_model, r_free_flags=self.r_free_flags, kernel_width_free_reflections=200, n_sampling_points=20, n_chebyshev_terms=13) sigmaa_estimator.show(out=self.out) sigmaa = sigmaa_estimator.sigmaa() obs_norm = abs(sigmaa_estimator.normalized_obs) calc_norm = sigmaa_estimator.normalized_calc f_model_outlier_object = scaling.likelihood_ratio_outlier_test( f_obs=obs_norm.data(), sigma_obs=None, f_calc=calc_norm.data(), # the data is prenormalized, all epsies are unity epsilon=flex.double(calc_norm.data().size(), 1.0), centric=obs_norm.centric_flags().data(), alpha=sigmaa.data(), beta=1.0 - sigmaa.data() * sigmaa.data()) modes = f_model_outlier_object.posterior_mode() lik = f_model_outlier_object.log_likelihood() p_lik = f_model_outlier_object.posterior_mode_log_likelihood() s_der = f_model_outlier_object.posterior_mode_snd_der() ll_gain = f_model_outlier_object.standardized_likelihood() # The smallest vallue should be 0. # sometimes, due to numerical issues, it comes out # a wee bit negative. please repair that eps = 1.0e-10 zeros = flex.bool(ll_gain < eps) p_values = ll_gain p_values = p_values.set_selected(zeros, eps) p_values = erf(flex.sqrt(p_values / 2.0)) p_values = 1.0 - flex.pow(p_values, float(p_values.size())) # select on p-values flags = flex.bool(p_values > level) flags = self.miller_obs.customized_copy(data=flags) ll_gain = self.miller_obs.customized_copy(data=ll_gain) p_values = self.miller_obs.customized_copy(data=p_values) log_message = """ Model based outlier rejection. ------------------------------ Calculated amplitudes and estimated values of alpha and beta are used to compute the log-likelihood of the observed amplitude. The method is inspired by Read, Acta Cryst. (1999). D55, 1759-1764. Outliers are rejected on the basis of the assumption that a scaled log likelihood differnce 2(log[P(Fobs)]-log[P(Fmode)])/Q\" is distributed according to a Chi-square distribution (Q\" is equal to the second derivative of the log likelihood function of the mode of the distribution). The outlier threshold of the p-value relates to the p-value of the extreme value distribution of the chi-square distribution. """ flags.map_to_asu() ll_gain.map_to_asu() p_values.map_to_asu() assert flags.indices().all_eq(self.miller_obs.indices()) assert ll_gain.indices().all_eq(self.miller_obs.indices()) assert p_values.indices().all_eq(self.miller_obs.indices()) log_message = self.make_log_model(log_message, flags, ll_gain, p_values, obs_norm, calc_norm, sigmaa, plot_out) tmp_log = StringIO() print >> tmp_log, log_message # histogram of log likelihood gain values print >> tmp_log print >> tmp_log, "The histoghram of scaled (LL-gain) values is shown below." print >> tmp_log, " Note: scaled (LL-gain) is approximately Chi-square distributed." print >> tmp_log print >> tmp_log, " scaled(LL-gain) Frequency" histo = flex.histogram(ll_gain.data(), 15) histo.show(f=tmp_log, format_cutoffs='%7.3f') print >> self.out, tmp_log.getvalue() if not return_data: return flags else: assert flags.indices().all_eq(self.miller_obs.indices()) return self.miller_obs.select(flags.data())
def calc_sigmaa(obs, model, flag): from mmtbx.scaling.sigmaa_estimation import sigmaa_estimator se = sigmaa_estimator(obs, model, flag, kernel_width_free_reflections=100) return se
def model_based_outliers(self, f_model, level=0.01, return_data=False, plot_out=None): assert self.r_free_flags is not None if self.r_free_flags.data().count(True) == 0: self.r_free_flags = self.r_free_flags.array(data=~self.r_free_flags.data()) sigmaa_estimator = sigmaa_estimation.sigmaa_estimator( miller_obs=self.miller_obs, miller_calc=f_model, r_free_flags=self.r_free_flags, kernel_width_free_reflections=200, n_sampling_points=20, n_chebyshev_terms=13, ) sigmaa_estimator.show(out=self.out) sigmaa = sigmaa_estimator.sigmaa() obs_norm = abs(sigmaa_estimator.normalized_obs) calc_norm = sigmaa_estimator.normalized_calc f_model_outlier_object = scaling.likelihood_ratio_outlier_test( f_obs=obs_norm.data(), sigma_obs=None, f_calc=calc_norm.data(), # the data is prenormalized, all epsies are unity epsilon=flex.double(calc_norm.data().size(), 1.0), centric=obs_norm.centric_flags().data(), alpha=sigmaa.data(), beta=1.0 - sigmaa.data() * sigmaa.data(), ) modes = f_model_outlier_object.posterior_mode() lik = f_model_outlier_object.log_likelihood() p_lik = f_model_outlier_object.posterior_mode_log_likelihood() s_der = f_model_outlier_object.posterior_mode_snd_der() ll_gain = f_model_outlier_object.standardized_likelihood() # The smallest vallue should be 0. # sometimes, due to numerical issues, it comes out # a wee bit negative. please repair that eps = 1.0e-10 zeros = flex.bool(ll_gain < eps) p_values = ll_gain p_values = p_values.set_selected(zeros, eps) p_values = erf(flex.sqrt(p_values / 2.0)) p_values = 1.0 - flex.pow(p_values, float(p_values.size())) # select on p-values flags = flex.bool(p_values > level) flags = self.miller_obs.customized_copy(data=flags) ll_gain = self.miller_obs.customized_copy(data=ll_gain) p_values = self.miller_obs.customized_copy(data=p_values) log_message = """ Model based outlier rejection. ------------------------------ Calculated amplitudes and estimated values of alpha and beta are used to compute the log-likelihood of the observed amplitude. The method is inspired by Read, Acta Cryst. (1999). D55, 1759-1764. Outliers are rejected on the basis of the assumption that a scaled log likelihood differnce 2(log[P(Fobs)]-log[P(Fmode)])/Q\" is distributed according to a Chi-square distribution (Q\" is equal to the second derivative of the log likelihood function of the mode of the distribution). The outlier threshold of the p-value relates to the p-value of the extreme value distribution of the chi-square distribution. """ flags.map_to_asu() ll_gain.map_to_asu() p_values.map_to_asu() assert flags.indices().all_eq(self.miller_obs.indices()) assert ll_gain.indices().all_eq(self.miller_obs.indices()) assert p_values.indices().all_eq(self.miller_obs.indices()) log_message = self.make_log_model(log_message, flags, ll_gain, p_values, obs_norm, calc_norm, sigmaa, plot_out) tmp_log = StringIO() print >> tmp_log, log_message # histogram of log likelihood gain values print >> tmp_log print >> tmp_log, "The histoghram of scaled (LL-gain) values is shown below." print >> tmp_log, " Note: scaled (LL-gain) is approximately Chi-square distributed." print >> tmp_log print >> tmp_log, " scaled(LL-gain) Frequency" histo = flex.histogram(ll_gain.data(), 15) histo.show(f=tmp_log, format_cutoffs="%7.3f") print >>self.out, tmp_log.getvalue() if not return_data: return flags else: assert flags.indices().all_eq(self.miller_obs.indices()) return self.miller_obs.select(flags.data())