def __init__(self, hooft_analysis, use_students_t_distribution=False, students_t_nu=None, probability_plot_slope=None): self.delta_fo2, minus_fo2 =\ hooft_analysis.delta_fo2.generate_bijvoet_mates().hemispheres_acentrics() self.delta_fc2, minus_fc2 =\ hooft_analysis.delta_fc2.generate_bijvoet_mates().hemispheres_acentrics() # we want to plot both hemispheres self.delta_fo2.indices().extend(minus_fo2.indices()) self.delta_fo2.data().extend(minus_fo2.data() * -1) self.delta_fo2.sigmas().extend(minus_fo2.sigmas()) self.delta_fc2.indices().extend(minus_fc2.indices()) self.delta_fc2.data().extend(minus_fc2.data() * -1) self.indices = self.delta_fo2.indices() observed_deviations = (hooft_analysis.G * self.delta_fc2.data() - self.delta_fo2.data())/self.delta_fo2.sigmas() if probability_plot_slope is not None: observed_deviations /= probability_plot_slope selection = flex.sort_permutation(observed_deviations) observed_deviations = observed_deviations.select(selection) if use_students_t_distribution: if students_t_nu is None: students_t_nu = maximise_students_t_correlation_coefficient( observed_deviations, 1, 200) self.distribution = distributions.students_t_distribution(students_t_nu) else: self.distribution = distributions.normal_distribution() self.x = self.distribution.quantiles(observed_deviations.size()) self.y = observed_deviations self.fit = flex.linear_regression(self.x[5:-5], self.y[5:-5]) self.correlation = flex.linear_correlation(self.x[5:-5], self.y[5:-5]) assert self.fit.is_well_defined()
def absolute_structure_analysis(xs, fo2, fc, scale, nu=None, log=None, outlier_cutoff_factor=None): if log is None: log = sys.stdout hooft_analysis = absolute_structure.hooft_analysis( fo2, fc, scale_factor=scale, outlier_cutoff_factor=outlier_cutoff_factor) print >> log, "Gaussian analysis:" hooft_analysis.show(out=log) NPP = absolute_structure.bijvoet_differences_probability_plot( hooft_analysis) print >> log, "Probability plot:" NPP.show(out=log) print >> log if nu is None: nu = absolute_structure.maximise_students_t_correlation_coefficient( NPP.y, min_nu=1, max_nu=200) distribution = distributions.students_t_distribution(nu) observed_deviations = NPP.y expected_deviations = distribution.quantiles(observed_deviations.size()) fit = flex.linear_regression( expected_deviations[5:-5], observed_deviations[5:-5]) t_analysis = absolute_structure.students_t_hooft_analysis( fo2, fc, nu, scale_factor=scale, probability_plot_slope=fit.slope(), outlier_cutoff_factor=outlier_cutoff_factor) tPP = absolute_structure.bijvoet_differences_probability_plot( t_analysis, use_students_t_distribution=True, students_t_nu=nu) print >> log, "Student's t analysis:" print >> log, "nu: %.2f" %nu t_analysis.show(out=log) print >> log, "Probability plot:" tPP.show(out=log) print >> log if xs is not None: flack = absolute_structure.flack_analysis(xs, fo2.as_xray_observations()) flack.show(out=log)
def compute_cc_significance(r, n, p): # https://en.wikipedia.org/wiki/Pearson_product-moment_correlation_coefficient#Testing_using_Student.27s_t-distribution if r == -1 or n <= 2: significance = False critical_value = 0 else: from scitbx.math import distributions dist = distributions.students_t_distribution(n - 2) t = dist.quantile(1 - p) critical_value = t / sqrt(n - 2 + t**2) significance = r > critical_value return significance, critical_value
def absolute_structure_analysis(xs, fo2, fc, scale, nu=None, log=None, outlier_cutoff_factor=None): if log is None: log = sys.stdout hooft_analysis = absolute_structure.hooft_analysis( fo2, fc, scale_factor=scale, outlier_cutoff_factor=outlier_cutoff_factor) print >> log, "Gaussian analysis:" hooft_analysis.show(out=log) NPP = absolute_structure.bijvoet_differences_probability_plot( hooft_analysis) print >> log, "Probability plot:" NPP.show(out=log) print >> log if nu is None: nu = absolute_structure.maximise_students_t_correlation_coefficient( NPP.y, min_nu=1, max_nu=200) distribution = distributions.students_t_distribution(nu) observed_deviations = NPP.y expected_deviations = distribution.quantiles(observed_deviations.size()) fit = flex.linear_regression(expected_deviations[5:-5], observed_deviations[5:-5]) t_analysis = absolute_structure.students_t_hooft_analysis( fo2, fc, nu, scale_factor=scale, probability_plot_slope=fit.slope(), outlier_cutoff_factor=outlier_cutoff_factor) tPP = absolute_structure.bijvoet_differences_probability_plot( t_analysis, use_students_t_distribution=True, students_t_nu=nu) print >> log, "Student's t analysis:" print >> log, "nu: %.2f" % nu t_analysis.show(out=log) print >> log, "Probability plot:" tPP.show(out=log) print >> log if xs is not None: flack = absolute_structure.flack_analysis(xs, fo2.as_xray_observations()) flack.show(out=log)
from cctbx.array_family import flex from iotbx import csv_utils import libtbx import libtbx.utils from libtbx.test_utils import approx_equal import scitbx.random from scitbx.random import variate, normal_distribution, gamma_distribution from scitbx.math import distributions from smtbx import absolute_structure try: distributions.students_t_distribution(1) except RuntimeError as e: # XXX Student's t distribution is not supported with GCC 3.2 builds if str(e).startswith("Implementation not available in this build."): students_t_available = False print("Skipping exercise_hooft_analysis() with Student's t distribution.") else: raise RuntimeError(e) else: students_t_available = True class test_case(object): d_min=1 use_students_t_errors=False elements = ("N", "C", "C", "S") * 5
from cctbx.array_family import flex from iotbx import csv_utils import libtbx import libtbx.utils from libtbx.test_utils import approx_equal import scitbx.random from scitbx.random import variate, normal_distribution, gamma_distribution from scitbx.math import distributions from smtbx import absolute_structure try: distributions.students_t_distribution(1) except RuntimeError, e: # XXX Student's t distribution is not supported with GCC 3.2 builds if str(e).startswith("Implementation not available in this build."): students_t_available = False print "Skipping exercise_hooft_analysis() with Student's t distribution." else: raise RuntimeError(e) else: students_t_available = True class test_case(object): d_min=1 use_students_t_errors=False elements = ("N", "C", "C", "S") * 5
def compute_corr_coeff(i): distribution = distributions.students_t_distribution(i) expected_deviations = distribution.quantiles(observed_deviations.size()) return flex.linear_correlation( observed_deviations[5:-5], expected_deviations[5:-5])